## **Chapter One**

### Simple Expressions

2 + 4 # Addition
## [1] 6
4 / 2 # Division
## [1] 2
3 * 4 # Multiplication
## [1] 12
45 - 13 # Subtraction
## [1] 32
4L / 5L # Integer Division
## [1] 0.8
4 %% 3 # for integer division with remainder
## [1] 1
2 ^ 5 # Exponential
## [1] 32
3 ** 4 # Indices
## [1] 81
9 ** 3 # Indices
## [1] 729
s = "Hello Charles, I hope you are working smart to achieve your dream?"
s
## [1] "Hello Charles, I hope you are working smart to achieve your dream?"
e <- 34 # Assigning variable (e) to  value (34)
e
## [1] 34
nchar(e) # number of characters
## [1] 2
3 -> r # Assigning variable (r) to  value (3)
r
## [1] 3
(u <- "Nana Kwame") # parentheses form will print an output
## [1] "Nana Kwame"
nchar(u) # number of characters
## [1] 10
q = seq( 1,50,2) # Start from 1 and end at 50 with a step of 2
q
##  [1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49
q[23] # checking for index of the sequence
## [1] 45
q[20] # checking for index of the sequence
## [1] 39
q[2:9] # checking form index 2 to index 9
## [1]  3  5  7  9 11 13 15 17
q ** 2 - r
##  [1]   -2    6   22   46   78  118  166  222  286  358  438  526  622  726  838
## [16]  958 1086 1222 1366 1518 1678 1846 2022 2206 2398
v <- c("A" = 1, "B" = 2, "C" = 3) # Creating keys and values
v
## A B C 
## 1 2 3
names(v) <- c("x", "y", "z") # Assigning new keys to the values
v
## x y z 
## 1 2 3
v["x"] # checking the key x
## x 
## 1
v["y"] # checking the key y
## y 
## 2
(x <- 1:4) # parentheses form will print an output
## [1] 1 2 3 4
(y <- 4:7) # parentheses form will print an output
## [1] 4 5 6 7
x - y 
## [1] -3 -3 -3 -3
#?nchar

#?`+`

square <- function(x) x**2
square(1:4)
## [1]  1  4  9 16
cube = function(x) x**3
cube(1:9)
## [1]   1   8  27  64 125 216 343 512 729
square_and_subtract <- function(x, y) return(x ** 2 - y)
square_and_subtract(30,30)
## [1] 870
sum(1:4)
## [1] 10
1 : 4
## [1] 1 2 3 4
sqrt(1:4)
## [1] 1.000000 1.414214 1.732051 2.000000
average <- function(x) {
 n <- length(x)
 sum(x) / n
}
average(1:9)
## [1] 5
mean(1:9)
## [1] 5
mean <- function(x) sum(x) / length(x)

mean(2:24)
## [1] 13
### **Controls loops** 

if (3 > 2) "true"
## [1] "true"
if (2 < 3) "false" else "true"
## [1] "false"
if (2 > 3) "bar" else "baz"
## [1] "baz"
if (5 > 4) "Charles" else "Nana"
## [1] "Charles"
if (2 > 3) {
 x <- "bar"
}else "baz"
## [1] "baz"
x <- 1:5
ifelse(x > 3, "bar", "baz")
## [1] "baz" "baz" "baz" "bar" "bar"
maybe_square <- function(x) {
 ifelse (x %% 2 == 0, x ** 2, x)
}
maybe_square(1:9)
## [1]  1  4  3 16  5 36  7 64  9
xo <- 1:5 # Assigning values (1 to 5) to variable xo
total <- 0
for (element in xo) total <- total + element
total
## [1] 15
xo
## [1] 1 2 3 4 5
x <- 1:5
total <- 0
for (index in seq_along(x)) {
 element <- x[index]
 total <- total + element
}
total
## [1] 15
x = -1:-5 # Assigning values (-1 to - 5) to variable x
total <- 9
index <- 1
while (index <= length(x)) {
 element <- x[index]
 index <- index + 1
 total <- total + element
}
total
## [1] -6
x
## [1] -1 -2 -3 -4 -5
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      -5      -4      -3      -3      -2      -1
### Factors
f <- factor(c("small", "small", "medium",
 "large", "small", "large"))
f
## [1] small  small  medium large  small  large 
## Levels: large medium small
ordered(f)
## [1] small  small  medium large  small  large 
## Levels: large < medium < small
summary(f)
##  large medium  small 
##      2      1      3
ff <- factor(c("small", "small", "medium",
 "large", "small", "large"),
 levels = c("small", "medium", "large")) # changing the levels of ff
ff
## [1] small  small  medium large  small  large 
## Levels: small medium large
ordered(ff)
## [1] small  small  medium large  small  large 
## Levels: small < medium < large
summary(ff)
##  small medium  large 
##      3      1      2
f <- factor(c("small", "small", "medium",
 "large", "small", "large"),
 levels = c("medium", "small", "large"),
 ordered = TRUE)
f
## [1] small  small  medium large  small  large 
## Levels: medium < small < large
summary(f)
## medium  small  large 
##      1      3      2
v <- 1:26
names(v) <- LETTERS[1:26] # Extracting the name of the letters with numbers
v
##  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z 
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
(ffo <- factor(LETTERS[1:4]))
## [1] A B C D
## Levels: A B C D
v[ffo] # Assigning values to the letters
## A B C D 
## 1 2 3 4
(ff <- factor(LETTERS[1:4], levels = rev(LETTERS[1:4]))) 
## [1] A B C D
## Levels: D C B A
v[ff]
## D C B A 
## 4 3 2 1
as.numeric(ff) # Number form
## [1] 4 3 2 1
as.vector(ff) # String form
## [1] "A" "B" "C" "D"
v[as.vector(ff)]
## A B C D 
## 1 2 3 4
### Data Frames
df <- data.frame(a = 1:4, b = letters[1:4]) # Creating a data frame
df
##   a b
## 1 1 a
## 2 2 b
## 3 3 c
## 4 4 d
df[1,1] # row one , column one
## [1] 1
df[1,2] # row one , column two
## [1] "a"
df[2,1] # row two , column one
## [1] 2
df[2,2] # row two , column two
## [1] "b"
df[1,] # All row one values
##   a b
## 1 1 a
df[,1] # All column one values
## [1] 1 2 3 4
df[,"a"] # using the column name
## [1] 1 2 3 4
df$b # using the column name
## [1] "a" "b" "c" "d"
df2 <- data.frame(a = 5:7, b = letters[5:7])
rbind(df, df2) ### Combining them using their rows
##   a b
## 1 1 a
## 2 2 b
## 3 3 c
## 4 4 d
## 5 5 e
## 6 6 f
## 7 7 g
df3 <- data.frame(c = 5:8, d = letters[5:8])
cbind(df, df3) ### Combining them using their columns
##   a b c d
## 1 1 a 5 e
## 2 2 b 6 f
## 3 3 c 7 g
## 4 4 d 8 h
df4 = data.frame(Random = rnorm(26), Alphabets =  letters)
df4
##        Random Alphabets
## 1   0.4516460         a
## 2   0.2163044         b
## 3   0.6436671         c
## 4   0.4485669         d
## 5  -1.8389144         e
## 6  -0.7360462         f
## 7   0.1452918         g
## 8   0.1854455         h
## 9  -0.0360053         i
## 10  0.1750531         j
## 11  1.0541459         k
## 12 -0.3875946         l
## 13 -0.8633137         m
## 14 -1.1431215         n
## 15  0.2610843         o
## 16 -0.7335032         p
## 17 -0.1958796         q
## 18  0.6539016         r
## 19 -1.4845533         s
## 20 -1.2848276         t
## 21 -0.2667454         u
## 22 -0.4209919         v
## 23  2.5683044         w
## 24  0.6854559         x
## 25 -0.4637876         y
## 26 -0.4577713         z
### Missing Values
NA + 5
## [1] NA
is.na(NA) # checking for a missing number
## [1] TRUE
is.na(4) # checking if 4 is a missing number
## [1] FALSE
v <- c(1:100,NA)
sum(v)
## [1] NA
sum(v, na.rm = TRUE)
## [1] 5050
### **R Packages**
#install.packages("magrittr")
library(magrittr)
### Data Pipeline
subsample_rows <- function(d, n) {
 rows <- sample(nrow(d), n)
 d[rows,]
}

d <- data.frame(x = rnorm(100), y = rnorm(100))

d %>% subsample_rows(n = 3)
##              x         y
## 56  0.92539129 0.3910950
## 65  0.09901638 0.3875896
## 96 -0.50833536 0.9330080
do <- data.frame(x = rnorm(100), y = rnorm(100))
do %>% lm(y ~ x, data = .)
## 
## Call:
## lm(formula = y ~ x, data = .)
## 
## Coefficients:
## (Intercept)            x  
##    -0.25883      0.08946
so <- data.frame(x = 1:10, y = 11:20)
so %>% lm(y ~ x, data = .)
## 
## Call:
## lm(formula = y ~ x, data = .)
## 
## Coefficients:
## (Intercept)            x  
##          10            1
#?lm()

rnorm(4) %>% data.frame(x = ., is_negative = . < 0)
##            x is_negative
## 1  0.4693503       FALSE
## 2 -0.6277748        TRUE
## 3 -0.6851523        TRUE
## 4 -0.4342983        TRUE
rnorm(4) %>% data.frame(x = ., y = abs(.))
##            x         y
## 1  0.9116599 0.9116599
## 2  1.1205732 1.1205732
## 3 -1.1213551 1.1213551
## 4  1.9892887 1.9892887
rnorm(4) %>% data.frame(x = sin(.), y = cos(.))
##            .          x            y
## 1 -0.9570749 -0.8175104  0.575913785
## 2 -0.8458195 -0.7485148  0.663118106
## 3  0.2112488  0.2096811  0.977769828
## 4 -1.5766002 -0.9999832 -0.005803872
#sin(1.03130453)
#cos(1.03130453)
f <- function(x, y) {
 y <- sum(cos(x),sin(y)) 
 z <- sum(sin(x),cos(y))
 z
}

f(2,4)
## [1] 1.296732
plot_and_fit <- function(d) {
 plot(y ~ x, data = d)
 abline(lm(y ~ x, data = d))
}

x <- rnorm(20)
y <- x + rnorm(20)
data.frame(x, y) %>% plot_and_fit

data.frame(x, y) %>% (function(d) {
 plot(y ~ x, data = d)
 abline(lm(y ~ x, data = d))
})

data.frame(x, y) %>% {
 plot(y ~ x, data = .)
 abline(lm(y ~ x, data = .))
}

d <- data.frame(x = rnorm(100), y = 4 + rnorm(100))

#d %>% {data.frame(mean_x = mean(.$x), mean_y = mean(.$y))}

d %$% data.frame(mean_x = mean(x), mean_y = mean(y))
##        mean_x   mean_y
## 1 -0.08483521 4.043954
d <- data.frame(x = rnorm(10), y = rnorm(10))

d %T>% plot(y ~ x, data = .) %>% lm(y ~ x, data = .)

## 
## Call:
## lm(formula = y ~ x, data = .)
## 
## Coefficients:
## (Intercept)            x  
##      0.1483       0.1683
## **Chapter Three**

### Data Manipulation
library(datasets)
library(ggplot2)
#library(help = "datasets")

data(cars)
head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
cars %>% head(3) ### First 3 rows of the cars datasets
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
cars %>% tail(3) ### Last 3 rows of the cars datasets
##    speed dist
## 48    24   93
## 49    24  120
## 50    25   85
cars %>% summary ### Summary of the cars datasets
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
cars %>% qplot(speed, dist, data = .)

#install.packages("mlbench")
library(mlbench)
#library(help = "mlbench")

data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
iris %>% head(5)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
iris %>% summary ### Summary of the iris datasets
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
iris %>% qplot(Sepal.Length, Sepal.Width, data = ., color = Species)

iris %>% qplot(Petal.Length, Petal.Width, data = ., color = Species)

#?read.table
#### **Breast Cancer**
data(BreastCancer)
BreastCancer %>% head()
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
##   Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses     Class
## 1           1           3               1       1    benign
## 2          10           3               2       1    benign
## 3           2           3               1       1    benign
## 4           4           3               7       1    benign
## 5           1           3               1       1    benign
## 6          10           9               7       1 malignant
data_url = "http://tinyurl.com/kw4xtts"
lines <- readLines(data_url)
lines[1:5]
## [1] "1000025,5,1,1,1,2,1,3,1,1,2"  "1002945,5,4,4,5,7,10,3,2,1,2"
## [3] "1015425,3,1,1,1,2,2,3,1,1,2"  "1016277,6,8,8,1,3,4,3,7,1,2" 
## [5] "1017023,4,1,1,3,2,1,3,1,1,2"
raw_breast_cancer = read.csv("C:/Users/HP/Desktop/Data Science/Machine learning/breast+cancer+wisconsin+original/breast-cancer-wisconsin.data")
raw_breast_cancer  %>% head(6)
##   X1000025 X5 X1 X1.1 X1.2 X2 X1.3 X3 X1.4 X1.5 X2.1
## 1  1002945  5  4    4    5  7   10  3    2    1    2
## 2  1015425  3  1    1    1  2    2  3    1    1    2
## 3  1016277  6  8    8    1  3    4  3    7    1    2
## 4  1017023  4  1    1    3  2    1  3    1    1    2
## 5  1017122  8 10   10    8  7   10  9    7    1    4
## 6  1018099  1  1    1    1  2   10  3    1    1    2
raw_breast_cancer <- read.csv(data_url)
raw_breast_cancer %>% head()
##   X1000025 X5 X1 X1.1 X1.2 X2 X1.3 X3 X1.4 X1.5 X2.1
## 1  1002945  5  4    4    5  7   10  3    2    1    2
## 2  1015425  3  1    1    1  2    2  3    1    1    2
## 3  1016277  6  8    8    1  3    4  3    7    1    2
## 4  1017023  4  1    1    3  2    1  3    1    1    2
## 5  1017122  8 10   10    8  7   10  9    7    1    4
## 6  1018099  1  1    1    1  2   10  3    1    1    2
raw_breast_cancer <- read.csv(data_url, header = FALSE)
raw_breast_cancer %>% head()
##        V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1000025  5  1  1  1  2  1  3  1   1   2
## 2 1002945  5  4  4  5  7 10  3  2   1   2
## 3 1015425  3  1  1  1  2  2  3  1   1   2
## 4 1016277  6  8  8  1  3  4  3  7   1   2
## 5 1017023  4  1  1  3  2  1  3  1   1   2
## 6 1017122  8 10 10  8  7 10  9  7   1   4
names(raw_breast_cancer) <- names(BreastCancer)
raw_breast_cancer %>% head()
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
##   Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses Class
## 1           1           3               1       1     2
## 2          10           3               2       1     2
## 3           2           3               1       1     2
## 4           4           3               7       1     2
## 5           1           3               1       1     2
## 6          10           9               7       1     4
raw_breast_cancer <- read.csv(data_url, header = FALSE,
 col.names = names(BreastCancer))
raw_breast_cancer %>% head()
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
##   Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses Class
## 1           1           3               1       1     2
## 2          10           3               2       1     2
## 3           2           3               1       1     2
## 4           4           3               7       1     2
## 5           1           3               1       1     2
## 6          10           9               7       1     4
formatted_breast_cancer <- raw_breast_cancer

#mapped <- formatted_breast_cancer$Class %>% map_class
#mapped %>% table

map_class <- function(x) {
 ifelse(x == 2, "bening", "malignant")
}
mapped <- formatted_breast_cancer$Class %>% map_class
mapped %>% table
## .
##    bening malignant 
##       458       241
formatted_breast_cancer$Class %>% unique
## [1] 2 4
#dict <- c("2" = "benign", "4" = "malignant")
#map_class <- function(x) dict[as.character(x)]

#mapped <- formatted_breast_cancer$Class %>% map_class
#mapped %>% table
mapped[1:20]
##  [1] "bening"    "bening"    "bening"    "bening"    "bening"    "malignant"
##  [7] "bening"    "bening"    "bening"    "bening"    "bening"    "bening"   
## [13] "malignant" "bening"    "malignant" "malignant" "bening"    "bening"   
## [19] "malignant" "bening"
#mapped %<>% unname
#mapped[1:20]

read = read.csv(data_url, header = FALSE,
 col.names = names(BreastCancer)) ->
 raw_breast_cancer ->
 formatted_breast_cancer
dict <- c("2" = "benign", "4" = "malignant")
map_class <- function(x) dict[as.character(x)]
formatted_breast_cancer$Class <-
 formatted_breast_cancer$Class %>%
 map_class %>%
 unname %>%
 factor(levels = c("benign", "malignant"))

read
##           Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1    1000025            5         1          1             1            2
## 2    1002945            5         4          4             5            7
## 3    1015425            3         1          1             1            2
## 4    1016277            6         8          8             1            3
## 5    1017023            4         1          1             3            2
## 6    1017122            8        10         10             8            7
## 7    1018099            1         1          1             1            2
## 8    1018561            2         1          2             1            2
## 9    1033078            2         1          1             1            2
## 10   1033078            4         2          1             1            2
## 11   1035283            1         1          1             1            1
## 12   1036172            2         1          1             1            2
## 13   1041801            5         3          3             3            2
## 14   1043999            1         1          1             1            2
## 15   1044572            8         7          5            10            7
## 16   1047630            7         4          6             4            6
## 17   1048672            4         1          1             1            2
## 18   1049815            4         1          1             1            2
## 19   1050670           10         7          7             6            4
## 20   1050718            6         1          1             1            2
## 21   1054590            7         3          2            10            5
## 22   1054593           10         5          5             3            6
## 23   1056784            3         1          1             1            2
## 24   1057013            8         4          5             1            2
## 25   1059552            1         1          1             1            2
## 26   1065726            5         2          3             4            2
## 27   1066373            3         2          1             1            1
## 28   1066979            5         1          1             1            2
## 29   1067444            2         1          1             1            2
## 30   1070935            1         1          3             1            2
## 31   1070935            3         1          1             1            1
## 32   1071760            2         1          1             1            2
## 33   1072179           10         7          7             3            8
## 34   1074610            2         1          1             2            2
## 35   1075123            3         1          2             1            2
## 36   1079304            2         1          1             1            2
## 37   1080185           10        10         10             8            6
## 38   1081791            6         2          1             1            1
## 39   1084584            5         4          4             9            2
## 40   1091262            2         5          3             3            6
## 41   1096800            6         6          6             9            6
## 42   1099510           10         4          3             1            3
## 43   1100524            6        10         10             2            8
## 44   1102573            5         6          5             6           10
## 45   1103608           10        10         10             4            8
## 46   1103722            1         1          1             1            2
## 47   1105257            3         7          7             4            4
## 48   1105524            1         1          1             1            2
## 49   1106095            4         1          1             3            2
## 50   1106829            7         8          7             2            4
## 51   1108370            9         5          8             1            2
## 52   1108449            5         3          3             4            2
## 53   1110102           10         3          6             2            3
## 54   1110503            5         5          5             8           10
## 55   1110524           10         5          5             6            8
## 56   1111249           10         6          6             3            4
## 57   1112209            8        10         10             1            3
## 58   1113038            8         2          4             1            5
## 59   1113483            5         2          3             1            6
## 60   1113906            9         5          5             2            2
## 61   1115282            5         3          5             5            3
## 62   1115293            1         1          1             1            2
## 63   1116116            9        10         10             1           10
## 64   1116132            6         3          4             1            5
## 65   1116192            1         1          1             1            2
## 66   1116998           10         4          2             1            3
## 67   1117152            4         1          1             1            2
## 68   1118039            5         3          4             1            8
## 69   1120559            8         3          8             3            4
## 70   1121732            1         1          1             1            2
## 71   1121919            5         1          3             1            2
## 72   1123061            6        10          2             8           10
## 73   1124651            1         3          3             2            2
## 74   1125035            9         4          5            10            6
## 75   1126417           10         6          4             1            3
## 76   1131294            1         1          2             1            2
## 77   1132347            1         1          4             1            2
## 78   1133041            5         3          1             2            2
## 79   1133136            3         1          1             1            2
## 80   1136142            2         1          1             1            3
## 81   1137156            2         2          2             1            1
## 82   1143978            4         1          1             2            2
## 83   1143978            5         2          1             1            2
## 84   1147044            3         1          1             1            2
## 85   1147699            3         5          7             8            8
## 86   1147748            5        10          6             1           10
## 87   1148278            3         3          6             4            5
## 88   1148873            3         6          6             6            5
## 89   1152331            4         1          1             1            2
## 90   1155546            2         1          1             2            3
## 91   1156272            1         1          1             1            2
## 92   1156948            3         1          1             2            2
## 93   1157734            4         1          1             1            2
## 94   1158247            1         1          1             1            2
## 95   1160476            2         1          1             1            2
## 96   1164066            1         1          1             1            2
## 97   1165297            2         1          1             2            2
## 98   1165790            5         1          1             1            2
## 99   1165926            9         6          9             2           10
## 100  1166630            7         5          6            10            5
## 101  1166654           10         3          5             1           10
## 102  1167439            2         3          4             4            2
## 103  1167471            4         1          2             1            2
## 104  1168359            8         2          3             1            6
## 105  1168736           10        10         10            10           10
## 106  1169049            7         3          4             4            3
## 107  1170419           10        10         10             8            2
## 108  1170420            1         6          8            10            8
## 109  1171710            1         1          1             1            2
## 110  1171710            6         5          4             4            3
## 111  1171795            1         3          1             2            2
## 112  1171845            8         6          4             3            5
## 113  1172152           10         3          3            10            2
## 114  1173216           10        10         10             3           10
## 115  1173235            3         3          2             1            2
## 116  1173347            1         1          1             1            2
## 117  1173347            8         3          3             1            2
## 118  1173509            4         5          5            10            4
## 119  1173514            1         1          1             1            4
## 120  1173681            3         2          1             1            2
## 121  1174057            1         1          2             2            2
## 122  1174057            4         2          1             1            2
## 123  1174131           10        10         10             2           10
## 124  1174428            5         3          5             1            8
## 125  1175937            5         4          6             7            9
## 126  1176406            1         1          1             1            2
## 127  1176881            7         5          3             7            4
## 128  1177027            3         1          1             1            2
## 129  1177399            8         3          5             4            5
## 130  1177512            1         1          1             1           10
## 131  1178580            5         1          3             1            2
## 132  1179818            2         1          1             1            2
## 133  1180194            5        10          8            10            8
## 134  1180523            3         1          1             1            2
## 135  1180831            3         1          1             1            3
## 136  1181356            5         1          1             1            2
## 137  1182404            4         1          1             1            2
## 138  1182410            3         1          1             1            2
## 139  1183240            4         1          2             1            2
## 140  1183246            1         1          1             1            1
## 141  1183516            3         1          1             1            2
## 142  1183911            2         1          1             1            2
## 143  1183983            9         5          5             4            4
## 144  1184184            1         1          1             1            2
## 145  1184241            2         1          1             1            2
## 146  1184840            1         1          3             1            2
## 147  1185609            3         4          5             2            6
## 148  1185610            1         1          1             1            3
## 149  1187457            3         1          1             3            8
## 150  1187805            8         8          7             4           10
## 151  1188472            1         1          1             1            1
## 152  1189266            7         2          4             1            6
## 153  1189286           10        10          8             6            4
## 154  1190394            4         1          1             1            2
## 155  1190485            1         1          1             1            2
## 156  1192325            5         5          5             6            3
## 157  1193091            1         2          2             1            2
## 158  1193210            2         1          1             1            2
## 159  1193683            1         1          2             1            3
## 160  1196295            9         9         10             3            6
## 161  1196915           10         7          7             4            5
## 162  1197080            4         1          1             1            2
## 163  1197270            3         1          1             1            2
## 164  1197440            1         1          1             2            1
## 165  1197510            5         1          1             1            2
## 166  1197979            4         1          1             1            2
## 167  1197993            5         6          7             8            8
## 168  1198128           10         8         10            10            6
## 169  1198641            3         1          1             1            2
## 170  1199219            1         1          1             2            1
## 171  1199731            3         1          1             1            2
## 172  1199983            1         1          1             1            2
## 173  1200772            1         1          1             1            2
## 174  1200847            6        10         10            10            8
## 175  1200892            8         6          5             4            3
## 176  1200952            5         8          7             7           10
## 177  1201834            2         1          1             1            2
## 178  1201936            5        10         10             3            8
## 179  1202125            4         1          1             1            2
## 180  1202812            5         3          3             3            6
## 181  1203096            1         1          1             1            1
## 182  1204242            1         1          1             1            2
## 183  1204898            6         1          1             1            2
## 184  1205138            5         8          8             8            5
## 185  1205579            8         7          6             4            4
## 186  1206089            2         1          1             1            1
## 187  1206695            1         5          8             6            5
## 188  1206841           10         5          6            10            6
## 189  1207986            5         8          4            10            5
## 190  1208301            1         2          3             1            2
## 191  1210963           10        10         10             8            6
## 192  1211202            7         5         10            10           10
## 193  1212232            5         1          1             1            2
## 194  1212251            1         1          1             1            2
## 195  1212422            3         1          1             1            2
## 196  1212422            4         1          1             1            2
## 197  1213375            8         4          4             5            4
## 198  1213383            5         1          1             4            2
## 199  1214092            1         1          1             1            2
## 200  1214556            3         1          1             1            2
## 201  1214966            9         7          7             5            5
## 202  1216694           10         8          8             4           10
## 203  1216947            1         1          1             1            2
## 204  1217051            5         1          1             1            2
## 205  1217264            1         1          1             1            2
## 206  1218105            5        10         10             9            6
## 207  1218741           10        10          9             3            7
## 208  1218860            1         1          1             1            1
## 209  1218860            1         1          1             1            1
## 210  1219406            5         1          1             1            1
## 211  1219525            8        10         10            10            5
## 212  1219859            8        10          8             8            4
## 213  1220330            1         1          1             1            2
## 214  1221863           10        10         10            10            7
## 215  1222047           10        10         10            10            3
## 216  1222936            8         7          8             7            5
## 217  1223282            1         1          1             1            2
## 218  1223426            1         1          1             1            2
## 219  1223793            6        10          7             7            6
## 220  1223967            6         1          3             1            2
## 221  1224329            1         1          1             2            2
## 222  1225799           10         6          4             3           10
## 223  1226012            4         1          1             3            1
## 224  1226612            7         5          6             3            3
## 225  1227210           10         5          5             6            3
## 226  1227244            1         1          1             1            2
## 227  1227481           10         5          7             4            4
## 228  1228152            8         9          9             5            3
## 229  1228311            1         1          1             1            1
## 230  1230175           10        10         10             3           10
## 231  1230688            7         4          7             4            3
## 232  1231387            6         8          7             5            6
## 233  1231706            8         4          6             3            3
## 234  1232225           10         4          5             5            5
## 235  1236043            3         3          2             1            3
## 236  1241232            3         1          4             1            2
## 237  1241559           10         8          8             2            8
## 238  1241679            9         8          8             5            6
## 239  1242364            8        10         10             8            6
## 240  1243256           10         4          3             2            3
## 241  1270479            5         1          3             3            2
## 242  1276091            3         1          1             3            1
## 243  1277018            2         1          1             1            2
## 244   128059            1         1          1             1            2
## 245  1285531            1         1          1             1            2
## 246  1287775            5         1          1             2            2
## 247   144888            8        10         10             8            5
## 248   145447            8         4          4             1            2
## 249   167528            4         1          1             1            2
## 250   169356            3         1          1             1            2
## 251   183913            1         2          2             1            2
## 252   191250           10         4          4            10            2
## 253  1017023            6         3          3             5            3
## 254  1100524            6        10         10             2            8
## 255  1116116            9        10         10             1           10
## 256  1168736            5         6          6             2            4
## 257  1182404            3         1          1             1            2
## 258  1182404            3         1          1             1            2
## 259  1198641            3         1          1             1            2
## 260   242970            5         7          7             1            5
## 261   255644           10         5          8            10            3
## 262   263538            5        10         10             6           10
## 263   274137            8         8          9             4            5
## 264   303213           10         4          4            10            6
## 265   314428            7         9          4            10           10
## 266  1182404            5         1          4             1            2
## 267  1198641           10        10          6             3            3
## 268   320675            3         3          5             2            3
## 269   324427           10         8          8             2            3
## 270   385103            1         1          1             1            2
## 271   390840            8         4          7             1            3
## 272   411453            5         1          1             1            2
## 273   320675            3         3          5             2            3
## 274   428903            7         2          4             1            3
## 275   431495            3         1          1             1            2
## 276   432809            3         1          3             1            2
## 277   434518            3         1          1             1            2
## 278   452264            1         1          1             1            2
## 279   456282            1         1          1             1            2
## 280   476903           10         5          7             3            3
## 281   486283            3         1          1             1            2
## 282   486662            2         1          1             2            2
## 283   488173            1         4          3            10            4
## 284   492268           10         4          6             1            2
## 285   508234            7         4          5            10            2
## 286   527363            8        10         10            10            8
## 287   529329           10        10         10            10           10
## 288   535331            3         1          1             1            3
## 289   543558            6         1          3             1            4
## 290   555977            5         6          6             8            6
## 291   560680            1         1          1             1            2
## 292   561477            1         1          1             1            2
## 293   563649            8         8          8             1            2
## 294   601265           10         4          4             6            2
## 295   606140            1         1          1             1            2
## 296   606722            5         5          7             8            6
## 297   616240            5         3          4             3            4
## 298    61634            5         4          3             1            2
## 299   625201            8         2          1             1            5
## 300    63375            9         1          2             6            4
## 301   635844            8         4         10             5            4
## 302   636130            1         1          1             1            2
## 303   640744           10        10         10             7            9
## 304   646904            1         1          1             1            2
## 305   653777            8         3          4             9            3
## 306   659642           10         8          4             4            4
## 307   666090            1         1          1             1            2
## 308   666942            1         1          1             1            2
## 309   667204            7         8          7             6            4
## 310   673637            3         1          1             1            2
## 311   684955            2         1          1             1            3
## 312   688033            1         1          1             1            2
## 313   691628            8         6          4            10           10
## 314   693702            1         1          1             1            2
## 315   704097            1         1          1             1            1
## 316   704168            4         6          5             6            7
## 317   706426            5         5          5             2            5
## 318   709287            6         8          7             8            6
## 319   718641            1         1          1             1            5
## 320   721482            4         4          4             4            6
## 321   730881            7         6          3             2            5
## 322   733639            3         1          1             1            2
## 323   733639            3         1          1             1            2
## 324   733823            5         4          6            10            2
## 325   740492            1         1          1             1            2
## 326   743348            3         2          2             1            2
## 327   752904           10         1          1             1            2
## 328   756136            1         1          1             1            2
## 329   760001            8        10          3             2            6
## 330   760239           10         4          6             4            5
## 331    76389           10         4          7             2            2
## 332   764974            5         1          1             1            2
## 333   770066            5         2          2             2            2
## 334   785208            5         4          6             6            4
## 335   785615            8         6          7             3            3
## 336   792744            1         1          1             1            2
## 337   797327            6         5          5             8            4
## 338   798429            1         1          1             1            2
## 339   704097            1         1          1             1            1
## 340   806423            8         5          5             5            2
## 341   809912           10         3          3             1            2
## 342   810104            1         1          1             1            2
## 343   814265            2         1          1             1            2
## 344   814911            1         1          1             1            2
## 345   822829            7         6          4             8           10
## 346   826923            1         1          1             1            2
## 347   830690            5         2          2             2            3
## 348   831268            1         1          1             1            1
## 349   832226            3         4          4            10            5
## 350   832567            4         2          3             5            3
## 351   836433            5         1          1             3            2
## 352   837082            2         1          1             1            2
## 353   846832            3         4          5             3            7
## 354   850831            2         7         10            10            7
## 355   855524            1         1          1             1            2
## 356   857774            4         1          1             1            3
## 357   859164            5         3          3             1            3
## 358   859350            8        10         10             7           10
## 359   866325            8        10          5             3            8
## 360   873549           10         3          5             4            3
## 361   877291            6        10         10            10           10
## 362   877943            3        10          3            10            6
## 363   888169            3         2          2             1            4
## 364   888523            4         4          4             2            2
## 365   896404            2         1          1             1            2
## 366   897172            2         1          1             1            2
## 367    95719            6        10         10            10            8
## 368   160296            5         8          8            10            5
## 369   342245            1         1          3             1            2
## 370   428598            1         1          3             1            1
## 371   492561            4         3          2             1            3
## 372   493452            1         1          3             1            2
## 373   493452            4         1          2             1            2
## 374   521441            5         1          1             2            2
## 375   560680            3         1          2             1            2
## 376   636437            1         1          1             1            2
## 377   640712            1         1          1             1            2
## 378   654244            1         1          1             1            1
## 379   657753            3         1          1             4            3
## 380   685977            5         3          4             1            4
## 381   805448            1         1          1             1            2
## 382   846423           10         6          3             6            4
## 383  1002504            3         2          2             2            2
## 384  1022257            2         1          1             1            2
## 385  1026122            2         1          1             1            2
## 386  1071084            3         3          2             2            3
## 387  1080233            7         6          6             3            2
## 388  1114570            5         3          3             2            3
## 389  1114570            2         1          1             1            2
## 390  1116715            5         1          1             1            3
## 391  1131411            1         1          1             2            2
## 392  1151734           10         8          7             4            3
## 393  1156017            3         1          1             1            2
## 394  1158247            1         1          1             1            1
## 395  1158405            1         2          3             1            2
## 396  1168278            3         1          1             1            2
## 397  1176187            3         1          1             1            2
## 398  1196263            4         1          1             1            2
## 399  1196475            3         2          1             1            2
## 400  1206314            1         2          3             1            2
## 401  1211265            3        10          8             7            6
## 402  1213784            3         1          1             1            2
## 403  1223003            5         3          3             1            2
## 404  1223306            3         1          1             1            2
## 405  1223543            1         2          1             3            2
## 406  1229929            1         1          1             1            2
## 407  1231853            4         2          2             1            2
## 408  1234554            1         1          1             1            2
## 409  1236837            2         3          2             2            2
## 410  1237674            3         1          2             1            2
## 411  1238021            1         1          1             1            2
## 412  1238464            1         1          1             1            1
## 413  1238633           10        10         10             6            8
## 414  1238915            5         1          2             1            2
## 415  1238948            8         5          6             2            3
## 416  1239232            3         3          2             6            3
## 417  1239347            8         7          8             5           10
## 418  1239967            1         1          1             1            2
## 419  1240337            5         2          2             2            2
## 420  1253505            2         3          1             1            5
## 421  1255384            3         2          2             3            2
## 422  1257200           10        10         10             7           10
## 423  1257648            4         3          3             1            2
## 424  1257815            5         1          3             1            2
## 425  1257938            3         1          1             1            2
## 426  1258549            9        10         10            10           10
## 427  1258556            5         3          6             1            2
## 428  1266154            8         7          8             2            4
## 429  1272039            1         1          1             1            2
## 430  1276091            2         1          1             1            2
## 431  1276091            1         3          1             1            2
## 432  1276091            5         1          1             3            4
## 433  1277629            5         1          1             1            2
## 434  1293439            3         2          2             3            2
## 435  1293439            6         9          7             5            5
## 436  1294562           10         8         10             1            3
## 437  1295186           10        10         10             1            6
## 438   527337            4         1          1             1            2
## 439   558538            4         1          3             3            2
## 440   566509            5         1          1             1            2
## 441   608157           10         4          3            10            4
## 442   677910            5         2          2             4            2
## 443   734111            1         1          1             3            2
## 444   734111            1         1          1             1            2
## 445   780555            5         1          1             6            3
## 446   827627            2         1          1             1            2
## 447  1049837            1         1          1             1            2
## 448  1058849            5         1          1             1            2
## 449  1182404            1         1          1             1            1
## 450  1193544            5         7          9             8            6
## 451  1201870            4         1          1             3            1
## 452  1202253            5         1          1             1            2
## 453  1227081            3         1          1             3            2
## 454  1230994            4         5          5             8            6
## 455  1238410            2         3          1             1            3
## 456  1246562           10         2          2             1            2
## 457  1257470           10         6          5             8            5
## 458  1259008            8         8          9             6            6
## 459  1266124            5         1          2             1            2
## 460  1267898            5         1          3             1            2
## 461  1268313            5         1          1             3            2
## 462  1268804            3         1          1             1            2
## 463  1276091            6         1          1             3            2
## 464  1280258            4         1          1             1            2
## 465  1293966            4         1          1             1            2
## 466  1296572           10         9          8             7            6
## 467  1298416           10         6          6             2            4
## 468  1299596            6         6          6             5            4
## 469  1105524            4         1          1             1            2
## 470  1181685            1         1          2             1            2
## 471  1211594            3         1          1             1            1
## 472  1238777            6         1          1             3            2
## 473  1257608            6         1          1             1            1
## 474  1269574            4         1          1             1            2
## 475  1277145            5         1          1             1            2
## 476  1287282            3         1          1             1            2
## 477  1296025            4         1          2             1            2
## 478  1296263            4         1          1             1            2
## 479  1296593            5         2          1             1            2
## 480  1299161            4         8          7            10            4
## 481  1301945            5         1          1             1            1
## 482  1302428            5         3          2             4            2
## 483  1318169            9        10         10            10           10
## 484   474162            8         7          8             5            5
## 485   787451            5         1          2             1            2
## 486  1002025            1         1          1             3            1
## 487  1070522            3         1          1             1            1
## 488  1073960           10        10         10            10            6
## 489  1076352            3         6          4            10            3
## 490  1084139            6         3          2             1            3
## 491  1115293            1         1          1             1            2
## 492  1119189            5         8          9             4            3
## 493  1133991            4         1          1             1            1
## 494  1142706            5        10         10            10            6
## 495  1155967            5         1          2            10            4
## 496  1170945            3         1          1             1            1
## 497  1181567            1         1          1             1            1
## 498  1182404            4         2          1             1            2
## 499  1204558            4         1          1             1            2
## 500  1217952            4         1          1             1            2
## 501  1224565            6         1          1             1            2
## 502  1238186            4         1          1             1            2
## 503  1253917            4         1          1             2            2
## 504  1265899            4         1          1             1            2
## 505  1268766            1         1          1             1            2
## 506  1277268            3         3          1             1            2
## 507  1286943            8        10         10            10            7
## 508  1295508            1         1          1             1            2
## 509  1297327            5         1          1             1            2
## 510  1297522            2         1          1             1            2
## 511  1298360            1         1          1             1            2
## 512  1299924            5         1          1             1            2
## 513  1299994            5         1          1             1            2
## 514  1304595            3         1          1             1            1
## 515  1306282            6         6          7            10            3
## 516  1313325            4        10          4             7            3
## 517  1320077            1         1          1             1            1
## 518  1320077            1         1          1             1            1
## 519  1320304            3         1          2             2            2
## 520  1330439            4         7          8             3            4
## 521   333093            1         1          1             1            3
## 522   369565            4         1          1             1            3
## 523   412300           10         4          5             4            3
## 524   672113            7         5          6            10            4
## 525   749653            3         1          1             1            2
## 526   769612            3         1          1             2            2
## 527   769612            4         1          1             1            2
## 528   798429            4         1          1             1            2
## 529   807657            6         1          3             2            2
## 530  8233704            4         1          1             1            1
## 531   837480            7         4          4             3            4
## 532   867392            4         2          2             1            2
## 533   869828            1         1          1             1            1
## 534  1043068            3         1          1             1            2
## 535  1056171            2         1          1             1            2
## 536  1061990            1         1          3             2            2
## 537  1113061            5         1          1             1            2
## 538  1116192            5         1          2             1            2
## 539  1135090            4         1          1             1            2
## 540  1145420            6         1          1             1            2
## 541  1158157            5         1          1             1            2
## 542  1171578            3         1          1             1            2
## 543  1174841            5         3          1             1            2
## 544  1184586            4         1          1             1            2
## 545  1186936            2         1          3             2            2
## 546  1197527            5         1          1             1            2
## 547  1222464            6        10         10            10            4
## 548  1240603            2         1          1             1            1
## 549  1240603            3         1          1             1            1
## 550  1241035            7         8          3             7            4
## 551  1287971            3         1          1             1            2
## 552  1289391            1         1          1             1            2
## 553  1299924            3         2          2             2            2
## 554  1306339            4         4          2             1            2
## 555  1313658            3         1          1             1            2
## 556  1313982            4         3          1             1            2
## 557  1321264            5         2          2             2            1
## 558  1321321            5         1          1             3            2
## 559  1321348            2         1          1             1            2
## 560  1321931            5         1          1             1            2
## 561  1321942            5         1          1             1            2
## 562  1321942            5         1          1             1            2
## 563  1328331            1         1          1             1            2
## 564  1328755            3         1          1             1            2
## 565  1331405            4         1          1             1            2
## 566  1331412            5         7         10            10            5
## 567  1333104            3         1          2             1            2
## 568  1334071            4         1          1             1            2
## 569  1343068            8         4          4             1            6
## 570  1343374           10        10          8            10            6
## 571  1344121            8        10          4             4            8
## 572   142932            7         6         10             5            3
## 573   183936            3         1          1             1            2
## 574   324382            1         1          1             1            2
## 575   378275           10         9          7             3            4
## 576   385103            5         1          2             1            2
## 577   690557            5         1          1             1            2
## 578   695091            1         1          1             1            2
## 579   695219            1         1          1             1            2
## 580   824249            1         1          1             1            2
## 581   871549            5         1          2             1            2
## 582   878358            5         7         10             6            5
## 583  1107684            6        10          5             5            4
## 584  1115762            3         1          1             1            2
## 585  1217717            5         1          1             6            3
## 586  1239420            1         1          1             1            2
## 587  1254538            8        10         10            10            6
## 588  1261751            5         1          1             1            2
## 589  1268275            9         8          8             9            6
## 590  1272166            5         1          1             1            2
## 591  1294261            4        10          8             5            4
## 592  1295529            2         5          7             6            4
## 593  1298484           10         3          4             5            3
## 594  1311875            5         1          2             1            2
## 595  1315506            4         8          6             3            4
## 596  1320141            5         1          1             1            2
## 597  1325309            4         1          2             1            2
## 598  1333063            5         1          3             1            2
## 599  1333495            3         1          1             1            2
## 600  1334659            5         2          4             1            1
## 601  1336798            3         1          1             1            2
## 602  1344449            1         1          1             1            1
## 603  1350568            4         1          1             1            2
## 604  1352663            5         4          6             8            4
## 605   188336            5         3          2             8            5
## 606   352431           10         5         10             3            5
## 607   353098            4         1          1             2            2
## 608   411453            1         1          1             1            2
## 609   557583            5        10         10            10           10
## 610   636375            5         1          1             1            2
## 611   736150           10         4          3            10            3
## 612   803531            5        10         10            10            5
## 613   822829            8        10         10            10            6
## 614  1016634            2         3          1             1            2
## 615  1031608            2         1          1             1            1
## 616  1041043            4         1          3             1            2
## 617  1042252            3         1          1             1            2
## 618  1057067            1         1          1             1            1
## 619  1061990            4         1          1             1            2
## 620  1073836            5         1          1             1            2
## 621  1083817            3         1          1             1            2
## 622  1096352            6         3          3             3            3
## 623  1140597            7         1          2             3            2
## 624  1149548            1         1          1             1            2
## 625  1174009            5         1          1             2            1
## 626  1183596            3         1          3             1            3
## 627  1190386            4         6          6             5            7
## 628  1190546            2         1          1             1            2
## 629  1213273            2         1          1             1            2
## 630  1218982            4         1          1             1            2
## 631  1225382            6         2          3             1            2
## 632  1235807            5         1          1             1            2
## 633  1238777            1         1          1             1            2
## 634  1253955            8         7          4             4            5
## 635  1257366            3         1          1             1            2
## 636  1260659            3         1          4             1            2
## 637  1268952           10        10          7             8            7
## 638  1275807            4         2          4             3            2
## 639  1277792            4         1          1             1            2
## 640  1277792            5         1          1             3            2
## 641  1285722            4         1          1             3            2
## 642  1288608            3         1          1             1            2
## 643  1290203            3         1          1             1            2
## 644  1294413            1         1          1             1            2
## 645  1299596            2         1          1             1            2
## 646  1303489            3         1          1             1            2
## 647  1311033            1         2          2             1            2
## 648  1311108            1         1          1             3            2
## 649  1315807            5        10         10            10           10
## 650  1318671            3         1          1             1            2
## 651  1319609            3         1          1             2            3
## 652  1323477            1         2          1             3            2
## 653  1324572            5         1          1             1            2
## 654  1324681            4         1          1             1            2
## 655  1325159            3         1          1             1            2
## 656  1326892            3         1          1             1            2
## 657  1330361            5         1          1             1            2
## 658  1333877            5         4          5             1            8
## 659  1334015            7         8          8             7            3
## 660  1334667            1         1          1             1            2
## 661  1339781            1         1          1             1            2
## 662  1339781            4         1          1             1            2
## 663 13454352            1         1          3             1            2
## 664  1345452            1         1          3             1            2
## 665  1345593            3         1          1             3            2
## 666  1347749            1         1          1             1            2
## 667  1347943            5         2          2             2            2
## 668  1348851            3         1          1             1            2
## 669  1350319            5         7          4             1            6
## 670  1350423            5        10         10             8            5
## 671  1352848            3        10          7             8            5
## 672  1353092            3         2          1             2            2
## 673  1354840            2         1          1             1            2
## 674  1354840            5         3          2             1            3
## 675  1355260            1         1          1             1            2
## 676  1365075            4         1          4             1            2
## 677  1365328            1         1          2             1            2
## 678  1368267            5         1          1             1            2
## 679  1368273            1         1          1             1            2
## 680  1368882            2         1          1             1            2
## 681  1369821           10        10         10            10            5
## 682  1371026            5        10         10            10            4
## 683  1371920            5         1          1             1            2
## 684   466906            1         1          1             1            2
## 685   466906            1         1          1             1            2
## 686   534555            1         1          1             1            2
## 687   536708            1         1          1             1            2
## 688   566346            3         1          1             1            2
## 689   603148            4         1          1             1            2
## 690   654546            1         1          1             1            2
## 691   654546            1         1          1             3            2
## 692   695091            5        10         10             5            4
## 693   714039            3         1          1             1            2
## 694   763235            3         1          1             1            2
## 695   776715            3         1          1             1            3
## 696   841769            2         1          1             1            2
## 697   888820            5        10         10             3            7
## 698   897471            4         8          6             4            3
## 699   897471            4         8          8             5            4
##     Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses Class
## 1             1           3               1       1     2
## 2            10           3               2       1     2
## 3             2           3               1       1     2
## 4             4           3               7       1     2
## 5             1           3               1       1     2
## 6            10           9               7       1     4
## 7            10           3               1       1     2
## 8             1           3               1       1     2
## 9             1           1               1       5     2
## 10            1           2               1       1     2
## 11            1           3               1       1     2
## 12            1           2               1       1     2
## 13            3           4               4       1     4
## 14            3           3               1       1     2
## 15            9           5               5       4     4
## 16            1           4               3       1     4
## 17            1           2               1       1     2
## 18            1           3               1       1     2
## 19           10           4               1       2     4
## 20            1           3               1       1     2
## 21           10           5               4       4     4
## 22            7           7              10       1     4
## 23            1           2               1       1     2
## 24            ?           7               3       1     4
## 25            1           3               1       1     2
## 26            7           3               6       1     4
## 27            1           2               1       1     2
## 28            1           2               1       1     2
## 29            1           2               1       1     2
## 30            1           1               1       1     2
## 31            1           2               1       1     2
## 32            1           3               1       1     2
## 33            5           7               4       3     4
## 34            1           3               1       1     2
## 35            1           2               1       1     2
## 36            1           2               1       1     2
## 37            1           8               9       1     4
## 38            1           7               1       1     2
## 39           10           5               6       1     4
## 40            7           7               5       1     4
## 41            ?           7               8       1     2
## 42            3           6               5       2     4
## 43           10           7               3       3     4
## 44            1           3               1       1     4
## 45            1           8              10       1     4
## 46            1           2               1       2     2
## 47            9           4               8       1     4
## 48            1           2               1       1     2
## 49            1           3               1       1     2
## 50            8           3               8       2     4
## 51            3           2               1       5     4
## 52            4           3               4       1     4
## 53            5           4              10       2     4
## 54            8           7               3       7     4
## 55            8           7               1       1     4
## 56            5           3               6       1     4
## 57            6           3               9       1     4
## 58            1           5               4       4     4
## 59           10           5               1       1     4
## 60            2           5               1       1     4
## 61            3           4              10       1     4
## 62            2           2               1       1     2
## 63            8           3               3       1     4
## 64            2           3               9       1     4
## 65            1           2               1       1     2
## 66            2           4               3      10     4
## 67            1           3               1       1     2
## 68           10           4               9       1     4
## 69            9           8               9       8     4
## 70            1           3               2       1     2
## 71            1           2               1       1     2
## 72            2           7               8      10     4
## 73            1           7               2       1     2
## 74           10           4               8       1     4
## 75            4           3               2       3     4
## 76            2           4               2       1     2
## 77            1           2               1       1     2
## 78            1           2               1       1     2
## 79            3           3               1       1     2
## 80            1           2               1       1     2
## 81            1           7               1       1     2
## 82            1           2               1       1     2
## 83            1           3               1       1     2
## 84            2           7               1       1     2
## 85            9           7              10       7     4
## 86            4           4              10      10     4
## 87            8           4               4       1     4
## 88           10           6               8       3     4
## 89            1           3               1       1     2
## 90            1           2               1       1     2
## 91            1           3               1       1     2
## 92            1           1               1       1     2
## 93            1           3               1       1     2
## 94            1           2               1       1     2
## 95            1           3               1       1     2
## 96            1           3               1       1     2
## 97            1           1               1       1     2
## 98            1           3               1       1     2
## 99            6           2               9      10     4
## 100          10           7               9       4     4
## 101           5           3              10       2     4
## 102           5           2               5       1     4
## 103           1           3               1       1     2
## 104           3           7               1       1     4
## 105           1           8               8       8     4
## 106           3           3               2       7     4
## 107          10           4               1       1     4
## 108          10           5               7       1     4
## 109           1           2               3       1     2
## 110           9           7               8       3     4
## 111           2           5               3       2     2
## 112           9           3               1       1     4
## 113          10           7               3       3     4
## 114           8           8               1       1     4
## 115           3           3               1       1     2
## 116           5           1               1       1     2
## 117           2           3               2       1     2
## 118          10           7               5       8     4
## 119           3           1               1       1     2
## 120           2           3               1       1     2
## 121           1           3               1       1     2
## 122           2           3               1       1     2
## 123          10           5               3       3     4
## 124          10           5               3       1     4
## 125           7           8              10       1     4
## 126           1           2               1       1     2
## 127          10           7               5       5     4
## 128           1           3               1       1     2
## 129          10           1               6       2     4
## 130           1           1               1       1     2
## 131           1           2               1       1     2
## 132           1           3               1       1     2
## 133          10           3               6       3     4
## 134           1           2               2       1     2
## 135           1           2               1       1     2
## 136           2           3               3       1     2
## 137           1           2               1       1     2
## 138           1           1               1       1     2
## 139           1           2               1       1     2
## 140           ?           2               1       1     2
## 141           1           1               1       1     2
## 142           1           1               1       1     2
## 143           5           4               3       3     4
## 144           5           1               1       1     2
## 145           1           2               1       1     2
## 146           ?           2               1       1     2
## 147           8           4               1       1     4
## 148           2           2               1       1     2
## 149           1           5               8       1     2
## 150          10           7               8       7     4
## 151           1           3               1       1     2
## 152          10           5               4       3     4
## 153           5           8              10       1     4
## 154           3           1               1       1     2
## 155           1           1               1       1     2
## 156          10           3               1       1     4
## 157           1           2               1       1     2
## 158           1           3               1       1     2
## 159           ?           1               1       1     2
## 160          10           7              10       6     4
## 161          10           5               7       2     4
## 162           1           3               2       1     2
## 163           1           3               1       1     2
## 164           3           1               1       7     2
## 165           ?           3               1       1     2
## 166           2           3               2       1     2
## 167          10           3              10       3     4
## 168           1           3               1      10     4
## 169           1           3               1       1     2
## 170           1           1               1       1     2
## 171           1           1               1       1     2
## 172           1           3               1       1     2
## 173           1           2               1       1     2
## 174          10          10              10       7     4
## 175          10           6               1       1     4
## 176          10           5               7       1     4
## 177           1           3               1       1     2
## 178           1           5              10       3     4
## 179           1           3               1       1     2
## 180          10           3               1       1     4
## 181           1           3               1       1     2
## 182           1           1               1       1     2
## 183           1           3               1       1     2
## 184          10           7               8       1     4
## 185          10           5               1       1     4
## 186           1           3               1       1     2
## 187           8           7              10       1     4
## 188          10           7               7      10     4
## 189           8           9              10       1     4
## 190           1           3               1       1     2
## 191           8           7              10       1     4
## 192          10           4              10       3     4
## 193           1           2               1       1     2
## 194           1           3               1       1     2
## 195           1           3               1       1     2
## 196           1           3               1       1     2
## 197           7           7               8       2     2
## 198           1           3               1       1     2
## 199           1           1               1       1     2
## 200           1           2               1       1     2
## 201          10           7               8       3     4
## 202          10           8               1       1     4
## 203           1           3               1       1     2
## 204           1           3               1       1     2
## 205           1           3               1       1     2
## 206          10           7              10       5     4
## 207           5           3               5       1     4
## 208           1           3               1       1     2
## 209           1           3               1       1     2
## 210           1           3               1       1     2
## 211          10           8              10       6     4
## 212           8           7               7       1     4
## 213           1           3               1       1     2
## 214          10           7              10       4     4
## 215          10          10               6       1     4
## 216           5           5              10       2     4
## 217           1           2               1       1     2
## 218           1           3               1       1     2
## 219           4           8              10       2     4
## 220           1           3               1       1     2
## 221           1           3               1       1     2
## 222          10           9              10       1     4
## 223           5           2               1       1     4
## 224           8           7               4       1     4
## 225          10           7               9       2     4
## 226           1           2               1       1     2
## 227          10           8               9       1     4
## 228           5           7               7       1     4
## 229           1           3               1       1     2
## 230          10           9              10       1     4
## 231           7           7               6       1     4
## 232           8           8               9       2     4
## 233           1           4               3       1     2
## 234          10           4               1       1     4
## 235           1           3               6       1     2
## 236           ?           3               1       1     2
## 237          10           4               8      10     4
## 238           2           4              10       4     4
## 239           9           3              10      10     4
## 240          10           5               3       2     4
## 241           2           2               3       1     2
## 242           1           3               1       1     2
## 243           1           3               1       1     2
## 244           5           5               1       1     2
## 245           1           3               1       1     2
## 246           2           3               1       1     2
## 247          10           7               8       1     4
## 248           9           3               3       1     4
## 249           1           3               6       1     2
## 250           ?           3               1       1     2
## 251           1           1               1       1     2
## 252          10           5               3       3     4
## 253          10           3               5       3     2
## 254          10           7               3       3     4
## 255           8           3               3       1     4
## 256          10           3               6       1     4
## 257           1           1               1       1     2
## 258           1           2               1       1     2
## 259           1           3               1       1     2
## 260           8           3               4       1     2
## 261          10           5               1       3     4
## 262          10          10               6       5     4
## 263          10           7               8       1     4
## 264          10           5               5       1     4
## 265           3           5               3       3     4
## 266           1           3               2       1     2
## 267          10           4               3       2     4
## 268          10           7               1       1     4
## 269           4           8               7       8     4
## 270           1           3               1       1     2
## 271          10           3               9       2     4
## 272           1           3               1       1     2
## 273          10           7               1       1     4
## 274           4           3               3       1     4
## 275           1           3               2       1     2
## 276           ?           2               1       1     2
## 277           1           2               1       1     2
## 278           1           2               1       1     2
## 279           1           3               1       1     2
## 280           7           3               3       8     4
## 281           1           3               1       1     2
## 282           1           3               1       1     2
## 283          10           5               6       1     4
## 284          10           5               3       1     4
## 285          10           3               8       2     4
## 286          10          10               7       3     4
## 287          10           4              10      10     4
## 288           1           2               1       1     2
## 289           5           5              10       1     4
## 290          10           4              10       4     4
## 291           1           1               1       1     2
## 292           1           3               1       1     2
## 293           ?           6              10       1     4
## 294          10           2               3       1     4
## 295           ?           2               1       1     2
## 296          10           7               4       1     4
## 297           5           4               7       1     2
## 298           ?           2               3       1     2
## 299           1           1               1       1     2
## 300          10           7               7       2     4
## 301           4           7              10       1     4
## 302           1           3               1       1     2
## 303          10           7              10      10     4
## 304           1           3               1       1     2
## 305          10           3               3       1     4
## 306          10           3              10       4     4
## 307           1           3               1       1     2
## 308           1           3               1       1     2
## 309           3           8               8       4     4
## 310           5           5               1       1     2
## 311           1           2               1       1     2
## 312           1           1               1       1     2
## 313           1           3               5       1     4
## 314           1           1               1       1     2
## 315           1           2               1       1     2
## 316           ?           4               9       1     2
## 317          10           4               3       1     4
## 318           8           8               9       1     4
## 319           1           3               1       1     2
## 320           5           7               3       1     2
## 321          10           7               4       6     4
## 322           ?           3               1       1     2
## 323           1           3               1       1     2
## 324          10           4               1       1     4
## 325           1           3               1       1     2
## 326           1           2               3       1     2
## 327          10           5               4       1     4
## 328           1           2               1       1     2
## 329           4           3              10       1     4
## 330          10           7               1       1     4
## 331           8           6               1       1     4
## 332           1           3               1       2     2
## 333           1           2               2       1     2
## 334          10           4               3       1     4
## 335          10           3               4       2     4
## 336           1           1               1       1     2
## 337          10           3               4       1     4
## 338           1           3               1       1     2
## 339           1           2               1       1     2
## 340          10           4               3       1     4
## 341          10           7               6       1     4
## 342           1           3               1       1     2
## 343           1           1               1       1     2
## 344           1           1               1       1     2
## 345          10           9               5       3     4
## 346           1           1               1       1     2
## 347           1           1               3       1     2
## 348           1           1               3       1     2
## 349           1           3               3       1     4
## 350           8           7               6       1     4
## 351           1           1               1       1     2
## 352           1           3               1       1     2
## 353           3           4               6       1     2
## 354          10           4               9       4     4
## 355           1           2               1       1     2
## 356           1           2               2       1     2
## 357           3           3               3       3     4
## 358          10           7               3       8     4
## 359           4           4              10       3     4
## 360           7           3               5       3     4
## 361          10           8              10      10     4
## 362          10           5               1       4     4
## 363           3           2               1       1     2
## 364           3           2               1       1     2
## 365           1           3               1       1     2
## 366           1           2               1       1     2
## 367          10           7              10       7     4
## 368          10           8              10       3     4
## 369           1           1               1       1     2
## 370           1           2               1       1     2
## 371           1           2               1       1     2
## 372           1           1               1       1     2
## 373           1           2               1       1     2
## 374           1           2               1       1     2
## 375           1           2               1       1     2
## 376           1           1               1       1     2
## 377           1           2               1       1     2
## 378           1           2               1       1     2
## 379           1           2               2       1     2
## 380           1           3               1       1     2
## 381           1           1               1       1     2
## 382          10           7               8       4     4
## 383           1           3               2       1     2
## 384           1           1               1       1     2
## 385           1           1               1       1     2
## 386           1           1               2       3     2
## 387          10           7               1       1     4
## 388           1           3               1       1     2
## 389           1           2               2       1     2
## 390           2           2               2       1     2
## 391           1           2               1       1     2
## 392          10           7               9       1     4
## 393           1           2               1       1     2
## 394           1           1               1       1     2
## 395           1           2               1       1     2
## 396           1           2               1       1     2
## 397           1           3               1       1     2
## 398           1           1               1       1     2
## 399           1           2               2       1     2
## 400           1           1               1       1     2
## 401           9           9               3       8     4
## 402           1           1               1       1     2
## 403           1           2               1       1     2
## 404           4           1               1       1     2
## 405           1           1               2       1     2
## 406           1           2               1       1     2
## 407           1           2               1       1     2
## 408           1           2               1       1     2
## 409           2           3               1       1     2
## 410           1           2               1       1     2
## 411           1           2               1       1     2
## 412           ?           2               1       1     2
## 413           4           8               5       1     4
## 414           1           3               1       1     2
## 415          10           6               6       1     4
## 416           3           3               5       1     2
## 417          10           7               2       1     4
## 418           1           2               1       1     2
## 419           2           3               2       2     2
## 420           1           1               1       1     2
## 421           3           3               1       1     2
## 422          10           8               2       1     4
## 423           1           3               3       1     2
## 424           1           2               1       1     2
## 425           1           1               1       1     2
## 426          10          10              10       1     4
## 427           1           1               1       1     2
## 428           2           5              10       1     4
## 429           1           2               1       1     2
## 430           1           2               1       1     2
## 431           1           2               2       1     2
## 432           1           3               2       1     2
## 433           1           2               2       1     2
## 434           1           1               1       1     2
## 435           8           4               2       1     2
## 436          10           5               1       1     4
## 437           1           2               8       1     4
## 438           1           1               1       1     2
## 439           1           1               1       1     2
## 440           1           1               1       1     2
## 441          10          10               1       1     4
## 442           4           1               1       1     2
## 443           3           1               1       1     2
## 444           2           1               1       1     2
## 445           1           2               1       1     2
## 446           1           1               1       1     2
## 447           1           1               1       1     2
## 448           1           1               1       1     2
## 449           1           1               1       1     2
## 450          10           8              10       1     4
## 451           1           2               1       1     2
## 452           1           1               1       1     2
## 453           1           1               1       1     2
## 454          10          10               7       1     4
## 455           1           1               1       1     2
## 456           6           1               1       2     4
## 457          10           8               6       1     4
## 458           3          10              10       1     4
## 459           1           1               1       1     2
## 460           1           1               1       1     2
## 461           1           1               1       1     2
## 462           5           1               1       1     2
## 463           1           1               1       1     2
## 464           1           1               2       1     2
## 465           1           1               1       1     2
## 466           4           7              10       3     4
## 467          10           9               7       1     4
## 468          10           7               6       2     4
## 469           1           1               1       1     2
## 470           1           2               1       1     2
## 471           1           2               1       1     2
## 472           1           1               1       1     2
## 473           1           1               1       1     2
## 474           1           1               1       1     2
## 475           1           1               1       1     2
## 476           1           1               1       1     2
## 477           1           1               1       1     2
## 478           1           1               1       1     2
## 479           1           1               1       1     2
## 480          10           7               5       1     4
## 481           1           1               1       1     2
## 482           1           1               1       1     2
## 483           5          10              10      10     4
## 484          10           9              10       1     4
## 485           1           1               1       1     2
## 486           3           1               1       1     2
## 487           1           2               1       1     2
## 488          10           8               1       5     4
## 489           3           3               4       1     4
## 490           4           4               1       1     4
## 491           1           1               1       1     2
## 492          10           7               1       1     4
## 493           1           2               1       1     2
## 494          10           6               5       2     4
## 495           5           2               1       1     2
## 496           1           2               1       1     2
## 497           1           1               1       1     2
## 498           1           1               1       1     2
## 499           1           2               1       1     2
## 500           1           2               1       1     2
## 501           1           3               1       1     2
## 502           1           2               1       1     2
## 503           1           2               1       1     2
## 504           1           3               1       1     2
## 505           1           1               1       1     2
## 506           1           1               1       1     2
## 507           5           4               8       7     4
## 508           4           1               1       1     2
## 509           1           1               1       1     2
## 510           1           1               1       1     2
## 511           1           1               1       1     2
## 512           1           2               1       1     2
## 513           1           1               1       1     2
## 514           1           2               1       1     2
## 515          10           8              10       2     4
## 516          10           9              10       1     4
## 517           1           1               1       1     2
## 518           1           2               1       1     2
## 519           1           1               1       1     2
## 520          10           9               1       1     4
## 521           1           1               1       1     2
## 522           1           1               1       1     2
## 523           5           7               3       1     4
## 524          10           5               3       1     4
## 525           1           2               1       1     2
## 526           1           1               1       1     2
## 527           1           1               1       1     2
## 528           1           3               1       1     2
## 529           1           1               1       1     2
## 530           1           2               1       1     2
## 531          10           6               9       1     4
## 532           1           2               1       1     2
## 533           1           3               1       1     2
## 534           1           2               1       1     2
## 535           1           2               1       1     2
## 536           1           3               1       1     2
## 537           1           3               1       1     2
## 538           1           3               1       1     2
## 539           1           2               1       1     2
## 540           1           2               1       1     2
## 541           2           2               1       1     2
## 542           1           1               1       1     2
## 543           1           1               1       1     2
## 544           1           2               1       1     2
## 545           1           2               1       1     2
## 546           1           2               1       1     2
## 547          10           7              10       1     4
## 548           1           1               1       1     2
## 549           1           1               1       1     2
## 550           5           7               8       2     4
## 551           1           2               1       1     2
## 552           1           3               1       1     2
## 553           1           4               2       1     2
## 554           5           2               1       2     2
## 555           1           1               1       1     2
## 556           1           4               8       1     2
## 557           1           2               1       1     2
## 558           1           1               1       1     2
## 559           1           2               1       1     2
## 560           1           2               1       1     2
## 561           1           3               1       1     2
## 562           1           3               1       1     2
## 563           1           3               1       1     2
## 564           1           2               1       1     2
## 565           1           3               2       1     2
## 566          10          10              10       1     4
## 567           1           3               1       1     2
## 568           3           2               1       1     2
## 569          10           2               5       2     4
## 570           5          10               3       1     4
## 571          10           8               2       1     4
## 572          10           9              10       2     4
## 573           1           2               1       1     2
## 574           1           2               1       1     2
## 575           2           7               7       1     4
## 576           1           3               1       1     2
## 577           1           2               1       1     2
## 578           1           2               1       1     2
## 579           1           2               1       1     2
## 580           1           3               1       1     2
## 581           1           2               1       1     2
## 582          10           7               5       1     4
## 583          10           6              10       1     4
## 584           1           1               1       1     2
## 585           1           1               1       1     2
## 586           1           1               1       1     2
## 587          10          10              10       1     4
## 588           1           2               2       1     2
## 589           3           4               1       1     4
## 590           1           1               1       1     2
## 591           1          10               1       1     4
## 592          10           7               6       1     4
## 593          10           4               1       1     4
## 594           1           1               1       1     2
## 595          10           7               1       1     4
## 596           1           2               1       1     2
## 597           1           2               1       1     2
## 598           1           3               1       1     2
## 599           1           2               1       1     2
## 600           1           1               1       1     2
## 601           1           2               1       1     2
## 602           1           2               1       1     2
## 603           1           2               1       1     2
## 604           1           8              10       1     4
## 605          10           8               1       2     4
## 606           8           7               8       3     4
## 607           1           1               1       1     2
## 608           1           1               1       1     2
## 609          10          10               1       1     4
## 610           1           1               1       1     2
## 611          10           7               1       2     4
## 612           2           8               5       1     4
## 613          10          10              10      10     4
## 614           1           2               1       1     2
## 615           1           2               1       1     2
## 616           1           2               1       1     2
## 617           1           2               1       1     2
## 618           ?           1               1       1     2
## 619           1           2               1       1     2
## 620           1           2               1       1     2
## 621           1           2               1       1     2
## 622           2           6               1       1     2
## 623           1           2               1       1     2
## 624           1           1               1       1     2
## 625           1           2               1       1     2
## 626           4           1               1       1     2
## 627           6           7               7       3     4
## 628           5           1               1       1     2
## 629           1           1               1       1     2
## 630           1           1               1       1     2
## 631           1           1               1       1     2
## 632           1           2               1       1     2
## 633           1           1               1       1     2
## 634           3           5              10       1     4
## 635           1           1               1       1     2
## 636           1           1               1       1     2
## 637           1          10              10       3     4
## 638           2           2               1       1     2
## 639           1           1               1       1     2
## 640           1           1               1       1     2
## 641           1           1               1       1     2
## 642           1           2               1       1     2
## 643           1           2               1       1     2
## 644           1           1               1       1     2
## 645           1           1               1       1     2
## 646           1           2               1       1     2
## 647           1           1               1       1     2
## 648           1           1               1       1     2
## 649           2          10              10      10     4
## 650           1           2               1       1     2
## 651           4           1               1       1     2
## 652           1           2               1       1     2
## 653           1           2               2       1     2
## 654           1           2               1       1     2
## 655           1           3               1       1     2
## 656           1           2               1       1     2
## 657           1           2               1       1     2
## 658           1           3               6       1     2
## 659          10           7               2       3     4
## 660           1           1               1       1     2
## 661           1           2               1       1     2
## 662           1           3               1       1     2
## 663           1           2               1       1     2
## 664           1           2               1       1     2
## 665           1           2               1       1     2
## 666           1           1               1       1     2
## 667           1           1               1       2     2
## 668           1           3               1       1     2
## 669           1           7              10       3     4
## 670           5           7              10       1     4
## 671           8           7               4       1     4
## 672           1           3               1       1     2
## 673           1           3               1       1     2
## 674           1           1               1       1     2
## 675           1           2               1       1     2
## 676           1           1               1       1     2
## 677           1           2               1       1     2
## 678           1           1               1       1     2
## 679           1           1               1       1     2
## 680           1           1               1       1     2
## 681          10          10              10       7     4
## 682          10           5               6       3     4
## 683           1           3               2       1     2
## 684           1           1               1       1     2
## 685           1           1               1       1     2
## 686           1           1               1       1     2
## 687           1           1               1       1     2
## 688           1           2               3       1     2
## 689           1           1               1       1     2
## 690           1           1               1       8     2
## 691           1           1               1       1     2
## 692           5           4               4       1     4
## 693           1           1               1       1     2
## 694           1           2               1       2     2
## 695           2           1               1       1     2
## 696           1           1               1       1     2
## 697           3           8              10       2     4
## 698           4          10               6       1     4
## 699           5          10               4       1     4
raw_breast_cancer$Class %>%
 { dict <- c("2" = "benign", "4" = "malignant")
 dict[as.character(.)]
 } %>%
 unname %>%
 factor(levels = c("benign", "malignant")) %>%
 table
## .
##    benign malignant 
##       458       241
### Saving a file
formatted_breast_cancer %>%
save(file = "C:/Users/HP/Desktop/Data Science/Learning R Language/formatted-breast-cancer.rda")

### loading a saved file  
load("C:/Users/HP/Desktop/Data Science/Learning R Language/formatted-breast-cancer.rda") 
#data(DNA)
#str(DNA)

### Boston Housing Datasets
data(BostonHousing)
str(BostonHousing)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : num  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ b      : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
boston <- "http://tinyurl.com/zq2u8vx"
#data(BostonHousing2)
#str(BostonHousing2)
#boston_housing <- read.table(boston)
#str(boston_housing)

col_classes <- rep("numeric", length(BostonHousing))
col_classes[which("chas" == names(BostonHousing))] <- "factor"

#boston_housing <- read.table(boston,col.names = names(BostonHousing),colClasses = col_classes)
#str(boston_housing)
### Readr Package
library(readr)

raw_breast_cancer <- read_csv("C:/Users/HP/Desktop/Data Science/Machine learning/breast+cancer+wisconsin+original/breast-cancer-wisconsin.data",show_col_types = "TRUE", col_names = names(BreastCancer))
raw_breast_cancer %>% head()
## # A tibble: 6 × 11
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
##     <dbl>        <dbl>     <dbl>      <dbl>         <dbl>        <dbl>
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
## # ℹ 5 more variables: Bare.nuclei <chr>, Bl.cromatin <dbl>,
## #   Normal.nucleoli <dbl>, Mitoses <dbl>, Class <dbl>
raw_breast_cancer <- read_csv("C:/Users/HP/Desktop/Data Science/Machine learning/breast+cancer+wisconsin+original/breast-cancer-wisconsin.data",
 col_names = names(BreastCancer), show_col_types = "TRUE")
raw_breast_cancer %>% head()
## # A tibble: 6 × 11
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
##     <dbl>        <dbl>     <dbl>      <dbl>         <dbl>        <dbl>
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
## # ℹ 5 more variables: Bare.nuclei <chr>, Bl.cromatin <dbl>,
## #   Normal.nucleoli <dbl>, Mitoses <dbl>, Class <dbl>
### Manipulating Data with dplyr
library(tidyverse)

iris %>% tibble::as_tibble()
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows
iris %>% tbl_df %>% select(Species,Petal.Width) %>% head()
## # A tibble: 6 × 2
##   Species Petal.Width
##   <fct>         <dbl>
## 1 setosa          0.2
## 2 setosa          0.2
## 3 setosa          0.2
## 4 setosa          0.2
## 5 setosa          0.2
## 6 setosa          0.4
iris %>% tbl_df %>% select(Species,Sepal.Width) %>% head()
## # A tibble: 6 × 2
##   Species Sepal.Width
##   <fct>         <dbl>
## 1 setosa          3.5
## 2 setosa          3  
## 3 setosa          3.2
## 4 setosa          3.1
## 5 setosa          3.6
## 6 setosa          3.9
iris %>% tbl_df %>% select(Species,Petal.Length) %>% head()
## # A tibble: 6 × 2
##   Species Petal.Length
##   <fct>          <dbl>
## 1 setosa           1.4
## 2 setosa           1.4
## 3 setosa           1.3
## 4 setosa           1.5
## 5 setosa           1.4
## 6 setosa           1.7
iris %>% tbl_df %>% select(Species,Sepal.Length) %>% head()
## # A tibble: 6 × 2
##   Species Sepal.Length
##   <fct>          <dbl>
## 1 setosa           5.1
## 2 setosa           4.9
## 3 setosa           4.7
## 4 setosa           4.6
## 5 setosa           5  
## 6 setosa           5.4
iris %>% tbl_df %>%
select(Sepal.Width, Petal.Width) %>% head() ## Select from Sepal.Width and Petal.Width
## # A tibble: 6 × 2
##   Sepal.Width Petal.Width
##         <dbl>       <dbl>
## 1         3.5         0.2
## 2         3           0.2
## 3         3.2         0.2
## 4         3.1         0.2
## 5         3.6         0.2
## 6         3.9         0.4
iris %>% tbl_df %>% select(Sepal.Length:Petal.Length) %>% head() ## Select from Sepal.Width to Petal.length
## # A tibble: 6 × 3
##   Sepal.Length Sepal.Width Petal.Length
##          <dbl>       <dbl>        <dbl>
## 1          5.1         3.5          1.4
## 2          4.9         3            1.4
## 3          4.7         3.2          1.3
## 4          4.6         3.1          1.5
## 5          5           3.6          1.4
## 6          5.4         3.9          1.7
iris %>% tbl_df %>% select(starts_with("Petal")) %>% head() ## Select Petal.Length and Width only
## # A tibble: 6 × 2
##   Petal.Length Petal.Width
##          <dbl>       <dbl>
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4
iris %>% tbl_df %>%
select(ends_with("Length")) %>% head()
## # A tibble: 6 × 2
##   Sepal.Length Petal.Length
##          <dbl>        <dbl>
## 1          5.1          1.4
## 2          4.9          1.4
## 3          4.7          1.3
## 4          4.6          1.5
## 5          5            1.4
## 6          5.4          1.7
iris %>% tbl_df %>%
select(contains("etal")) %>% head()
## # A tibble: 6 × 2
##   Petal.Length Petal.Width
##          <dbl>       <dbl>
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4
iris %>% tbl_df %>% select(contains("al")) %>% head()
## # A tibble: 6 × 4
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
##          <dbl>       <dbl>        <dbl>       <dbl>
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3            1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5           3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
iris %>% tbl_df %>%
select(matches(".t.")) %>% head()
## # A tibble: 6 × 4
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
##          <dbl>       <dbl>        <dbl>       <dbl>
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3            1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5           3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
iris %>% tbl_df %>%
 select(-starts_with("Petal")) %>% head() ### inverse selection
## # A tibble: 6 × 3
##   Sepal.Length Sepal.Width Species
##          <dbl>       <dbl> <fct>  
## 1          5.1         3.5 setosa 
## 2          4.9         3   setosa 
## 3          4.7         3.2 setosa 
## 4          4.6         3.1 setosa 
## 5          5           3.6 setosa 
## 6          5.4         3.9 setosa
iris %>% tbl_df %>%
 mutate(Petal.Width.plus.Length = Petal.Width + Petal.Length) %>%select(Species, Petal.Width.plus.Length) %>%head()
## # A tibble: 6 × 2
##   Species Petal.Width.plus.Length
##   <fct>                     <dbl>
## 1 setosa                      1.6
## 2 setosa                      1.6
## 3 setosa                      1.5
## 4 setosa                      1.7
## 5 setosa                      1.6
## 6 setosa                      2.1
iris %>% tbl_df %>% mutate(Sepal.Width.plus.Length = Sepal.Width + Sepal.Length) %>%select(Species, Sepal.Width.plus.Length) %>%head()
## # A tibble: 6 × 2
##   Species Sepal.Width.plus.Length
##   <fct>                     <dbl>
## 1 setosa                      8.6
## 2 setosa                      7.9
## 3 setosa                      7.9
## 4 setosa                      7.7
## 5 setosa                      8.6
## 6 setosa                      9.3
iris %>% tbl_df %>%mutate(Petal.Width.plus.Length = Petal.Width + Petal.Length,Sepal.Width.plus.Length = Sepal.Width + Sepal.Length) %>%select(Species,Petal.Width.plus.Length, Sepal.Width.plus.Length) %>% head()
## # A tibble: 6 × 3
##   Species Petal.Width.plus.Length Sepal.Width.plus.Length
##   <fct>                     <dbl>                   <dbl>
## 1 setosa                      1.6                     8.6
## 2 setosa                      1.6                     7.9
## 3 setosa                      1.5                     7.9
## 4 setosa                      1.7                     7.7
## 5 setosa                      1.6                     8.6
## 6 setosa                      2.1                     9.3
iris %>% tbl_df %>%transmute(Species ,Petal.Width.plus.Length = Petal.Width + Petal.Length) %>%
 head()
## # A tibble: 6 × 2
##   Species Petal.Width.plus.Length
##   <fct>                     <dbl>
## 1 setosa                      1.6
## 2 setosa                      1.6
## 3 setosa                      1.5
## 4 setosa                      1.7
## 5 setosa                      1.6
## 6 setosa                      2.1
iris %>% tbl_df %>% arrange(Sepal.Length) %>% head() 
## # A tibble: 6 × 5
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
## 1          4.3         3            1.1         0.1 setosa 
## 2          4.4         2.9          1.4         0.2 setosa 
## 3          4.4         3            1.3         0.2 setosa 
## 4          4.4         3.2          1.3         0.2 setosa 
## 5          4.5         2.3          1.3         0.3 setosa 
## 6          4.6         3.1          1.5         0.2 setosa
#from smallest to largest in Sepal.Length column
iris %>% tbl_df %>% arrange(Petal.Width) %>% head()
## # A tibble: 6 × 5
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
## 1          4.9         3.1          1.5         0.1 setosa 
## 2          4.8         3            1.4         0.1 setosa 
## 3          4.3         3            1.1         0.1 setosa 
## 4          5.2         4.1          1.5         0.1 setosa 
## 5          4.9         3.6          1.4         0.1 setosa 
## 6          5.1         3.5          1.4         0.2 setosa
#from smallest to largest in Sepal.Length column
iris %>% tbl_df %>%
 arrange(desc(Sepal.Length)) %>%
 head() ### from largest to smallest in Sepal.Length column
## # A tibble: 6 × 5
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species  
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>    
## 1          7.9         3.8          6.4         2   virginica
## 2          7.7         3.8          6.7         2.2 virginica
## 3          7.7         2.6          6.9         2.3 virginica
## 4          7.7         2.8          6.7         2   virginica
## 5          7.7         3            6.1         2.3 virginica
## 6          7.6         3            6.6         2.1 virginica
iris %>% tbl_df %>%
 filter(Sepal.Length >= 5) %>%
 head()
## # A tibble: 6 × 5
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
## 1          5.1         3.5          1.4         0.2 setosa 
## 2          5           3.6          1.4         0.2 setosa 
## 3          5.4         3.9          1.7         0.4 setosa 
## 4          5           3.4          1.5         0.2 setosa 
## 5          5.4         3.7          1.5         0.2 setosa 
## 6          5.8         4            1.2         0.2 setosa
iris %>% tbl_df %>%
 filter(Petal.Length <= 1.4) %>%
 head()
## # A tibble: 6 × 5
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
## 1          5.1         3.5          1.4         0.2 setosa 
## 2          4.9         3            1.4         0.2 setosa 
## 3          4.7         3.2          1.3         0.2 setosa 
## 4          5           3.6          1.4         0.2 setosa 
## 5          4.6         3.4          1.4         0.3 setosa 
## 6          4.4         2.9          1.4         0.2 setosa
iris %>% tbl_df %>%filter(Sepal.Length > 5 & Species == "virginica") %>%select(Species, Sepal.Length) %>%head() 
## # A tibble: 6 × 2
##   Species   Sepal.Length
##   <fct>            <dbl>
## 1 virginica          6.3
## 2 virginica          5.8
## 3 virginica          7.1
## 4 virginica          6.3
## 5 virginica          6.5
## 6 virginica          7.6
iris %>% tbl_df %>% group_by(Species) %>% head()
## # A tibble: 6 × 5
## # Groups:   Species [1]
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
## 1          5.1         3.5          1.4         0.2 setosa 
## 2          4.9         3            1.4         0.2 setosa 
## 3          4.7         3.2          1.3         0.2 setosa 
## 4          4.6         3.1          1.5         0.2 setosa 
## 5          5           3.6          1.4         0.2 setosa 
## 6          5.4         3.9          1.7         0.4 setosa
iris %>% summarise(Mean.Petal.Length = mean(Petal.Length),
 Mean.Sepal.Length = mean(Sepal.Length))
##   Mean.Petal.Length Mean.Sepal.Length
## 1             3.758          5.843333
iris %>% summarise(Std.Petal.Length = sd(Petal.Length), Std.Sepal.Length = sd(Sepal.Length))
##   Std.Petal.Length Std.Sepal.Length
## 1         1.765298        0.8280661
iris %>% summarise(Mean.Petal.Width = mean(Petal.Width),
 Mean.Sepal.Width = mean(Sepal.Width))
##   Mean.Petal.Width Mean.Sepal.Width
## 1         1.199333         3.057333
iris %>% summarise(Var.Petal.Width = var(Petal.Width), Var.Seapl.Width= var(Sepal.Width)) 
##   Var.Petal.Width Var.Seapl.Width
## 1       0.5810063       0.1899794
iris %>%group_by(Species) %>%summarise(Mean.Petal.Length = mean(Petal.Length)) 
## # A tibble: 3 × 2
##   Species    Mean.Petal.Length
##   <fct>                  <dbl>
## 1 setosa                  1.46
## 2 versicolor              4.26
## 3 virginica               5.55
iris %>% group_by(Species) %>% summarise(Var.Petal.Length = var(Petal.Length))
## # A tibble: 3 × 2
##   Species    Var.Petal.Length
##   <fct>                 <dbl>
## 1 setosa               0.0302
## 2 versicolor           0.221 
## 3 virginica            0.305
iris %>% group_by(Species) %>% summarise(Mean.Sepal.Length = mean(Sepal.Length)) 
## # A tibble: 3 × 2
##   Species    Mean.Sepal.Length
##   <fct>                  <dbl>
## 1 setosa                  5.01
## 2 versicolor              5.94
## 3 virginica               6.59
iris %>% group_by(Species)%>%
 summarize(Median.Petal.Width = median(Petal.Width))
## # A tibble: 3 × 2
##   Species    Median.Petal.Width
##   <fct>                   <dbl>
## 1 setosa                    0.2
## 2 versicolor                1.3
## 3 virginica                 2
iris %>% group_by(Species)%>% summarize(Median.Sepal.Width = median(Sepal.Width))
## # A tibble: 3 × 2
##   Species    Median.Sepal.Width
##   <fct>                   <dbl>
## 1 setosa                    3.4
## 2 versicolor                2.8
## 3 virginica                 3
iris %>% summarise(Observations = n())
##   Observations
## 1          150
iris %>%
 group_by(Species) %>%
 summarise(Number.Of.Species = n())
## # A tibble: 3 × 2
##   Species    Number.Of.Species
##   <fct>                  <int>
## 1 setosa                    50
## 2 versicolor                50
## 3 virginica                 50
iris %>%group_by(Species) %>%summarise(Number.Of.Samples = n(), Mean.Petal.Length = mean(Petal.Length))
## # A tibble: 3 × 3
##   Species    Number.Of.Samples Mean.Petal.Length
##   <fct>                  <int>             <dbl>
## 1 setosa                    50              1.46
## 2 versicolor                50              4.26
## 3 virginica                 50              5.55
iris %>%
 group_by(Species) %>%
 summarise(Number.Of.Samples = n(),
 Mean.Sepal.Length = mean(Sepal.Length))
## # A tibble: 3 × 3
##   Species    Number.Of.Samples Mean.Sepal.Length
##   <fct>                  <int>             <dbl>
## 1 setosa                    50              5.01
## 2 versicolor                50              5.94
## 3 virginica                 50              6.59
format_class <- . %>% {
 dict <- c("2" = "benign", "4" = "malignant")
 dict[as.character(.)]
} %>% unname %>% factor(levels = c("benign", "malignant"))

formatted_breast_cancer <-
 raw_breast_cancer %>% mutate(Class = format_class(Class)) 
 
formatted_breast_cancer %>%
 group_by(Class) %>%
 summarise(mean.Cl.thickness = mean(Cl.thickness)) 
## # A tibble: 2 × 2
##   Class     mean.Cl.thickness
##   <fct>                 <dbl>
## 1 benign                 2.96
## 2 malignant              7.20
formatted_breast_cancer %>%
 group_by(Class) %>%
 summarise(mean.Cell.size = mean(Cell.size))
## # A tibble: 2 × 2
##   Class     mean.Cell.size
##   <fct>              <dbl>
## 1 benign              1.33
## 2 malignant           6.57
formatted_breast_cancer %>%
 arrange(Cell.size) %>%
 group_by(Cell.size, Class) %>%
 summarise(ClassCount = n()) 
## # A tibble: 18 × 3
## # Groups:   Cell.size [10]
##    Cell.size Class     ClassCount
##        <dbl> <fct>          <int>
##  1         1 benign           380
##  2         1 malignant          4
##  3         2 benign            37
##  4         2 malignant          8
##  5         3 benign            27
##  6         3 malignant         25
##  7         4 benign             9
##  8         4 malignant         31
##  9         5 malignant         30
## 10         6 benign             2
## 11         6 malignant         25
## 12         7 benign             1
## 13         7 malignant         18
## 14         8 benign             1
## 15         8 malignant         28
## 16         9 benign             1
## 17         9 malignant          5
## 18        10 malignant         67
formatted_breast_cancer %>%
 group_by(Class, as.factor(Cell.size)) %>%
 summarise(mean.Cl.thickness = mean(Cl.thickness))
## # A tibble: 18 × 3
## # Groups:   Class [2]
##    Class     `as.factor(Cell.size)` mean.Cl.thickness
##    <fct>     <fct>                              <dbl>
##  1 benign    1                                   2.76
##  2 benign    2                                   3.49
##  3 benign    3                                   3.81
##  4 benign    4                                   5.11
##  5 benign    6                                   5   
##  6 benign    7                                   5   
##  7 benign    8                                   6   
##  8 benign    9                                   6   
##  9 malignant 1                                   7.25
## 10 malignant 2                                   6.75
## 11 malignant 3                                   6.44
## 12 malignant 4                                   7.71
## 13 malignant 5                                   6.87
## 14 malignant 6                                   6.88
## 15 malignant 7                                   6.89
## 16 malignant 8                                   7.18
## 17 malignant 9                                   8.8 
## 18 malignant 10                                  7.52
#formatted_breast_cancer %>%group_by(as.factor(Cell.size), Class) %>% summarise(mean.thickness = mean(Cl.thickness)) 
iris %>% select(Species, Petal.Length) %>% head() 
##   Species Petal.Length
## 1  setosa          1.4
## 2  setosa          1.4
## 3  setosa          1.3
## 4  setosa          1.5
## 5  setosa          1.4
## 6  setosa          1.7
iris %>% select(Species, Petal.Length) %>%
 qplot(Species, Petal.Length, geom = "boxplot", data = .) 

#library(tidyr) 
iris %>%gather(key = Attribute, value = Measurement,Sepal.Length, Sepal.Width) %>%select(Species, Attribute, Measurement) %>% head() 
##   Species    Attribute Measurement
## 1  setosa Sepal.Length         5.1
## 2  setosa Sepal.Length         4.9
## 3  setosa Sepal.Length         4.7
## 4  setosa Sepal.Length         4.6
## 5  setosa Sepal.Length         5.0
## 6  setosa Sepal.Length         5.4
iris %>%
 gather(key = Attribute, value = Measurement,
 Sepal.Length, Sepal.Width) %>% select(Species, Attribute, Measurement) %>% qplot(Attribute, Measurement, geom = "boxplot", facets = . ~ Species, data = .)

iris %>%
 gather(key = Attribute, value = Measurement,
 Petal.Length, Petal.Width) %>%
 select(Species, Attribute, Measurement) %>%
 head() 
##   Species    Attribute Measurement
## 1  setosa Petal.Length         1.4
## 2  setosa Petal.Length         1.4
## 3  setosa Petal.Length         1.3
## 4  setosa Petal.Length         1.5
## 5  setosa Petal.Length         1.4
## 6  setosa Petal.Length         1.7
iris %>%
 gather(key = Attribute, value = Measurement,
 Petal.Length, Petal.Width) %>%
 select(Species, Attribute, Measurement) %>%
 qplot(Attribute, Measurement,
 geom = "boxplot",
 facets = . ~ Species, data = .)

## **Chapter 4** 
### Visualizing Data
library(graphics)
x = rnorm(100)
y = rnorm(100)
plot(x,y, type="p", main ="X against Y", col = "red")

data(cars)
cars %$% plot(speed, dist, main = "Cars data",
 xlab = "Speed", ylab = "Stopping distance")

cars %>% plot(dist ~ speed, data = ., main ="Cars data", col = "red")

cars %$% plot(speed, dist, main = "Cars data", type = "h",
 xlab = "Speed", ylab = "Stopping Distance")

cars %$% plot(speed, dist, main = "Cars data", type = "s",
 xlab = "Speed", ylab = "Stopping Distance")

cars %$% hist(speed, col = "orange")

cars %$% hist(dist, col= "blue")

cars %>% lm(dist ~ speed, data = .) %>% plot ### Plotting Linear Models

data = longley
data
##      GNP.deflator     GNP Unemployed Armed.Forces Population Year Employed
## 1947         83.0 234.289      235.6        159.0    107.608 1947   60.323
## 1948         88.5 259.426      232.5        145.6    108.632 1948   61.122
## 1949         88.2 258.054      368.2        161.6    109.773 1949   60.171
## 1950         89.5 284.599      335.1        165.0    110.929 1950   61.187
## 1951         96.2 328.975      209.9        309.9    112.075 1951   63.221
## 1952         98.1 346.999      193.2        359.4    113.270 1952   63.639
## 1953         99.0 365.385      187.0        354.7    115.094 1953   64.989
## 1954        100.0 363.112      357.8        335.0    116.219 1954   63.761
## 1955        101.2 397.469      290.4        304.8    117.388 1955   66.019
## 1956        104.6 419.180      282.2        285.7    118.734 1956   67.857
## 1957        108.4 442.769      293.6        279.8    120.445 1957   68.169
## 1958        110.8 444.546      468.1        263.7    121.950 1958   66.513
## 1959        112.6 482.704      381.3        255.2    123.366 1959   68.655
## 1960        114.2 502.601      393.1        251.4    125.368 1960   69.564
## 1961        115.7 518.173      480.6        257.2    127.852 1961   69.331
## 1962        116.9 554.894      400.7        282.7    130.081 1962   70.551
longley %>% plot(Unemployed ~ Year, data = ., type = 'l')
longley %>% lines(Armed.Forces ~ Year, data = ., col = "blue")

longley %$% plot(Unemployed ~ Year, type = 'l',
 ylim = range(c(Unemployed, Armed.Forces)))
longley %>% lines(Armed.Forces ~ Year, data = ., col = "green")

as_tibble(longley)
## # A tibble: 16 × 7
##    GNP.deflator   GNP Unemployed Armed.Forces Population  Year Employed
##           <dbl> <dbl>      <dbl>        <dbl>      <dbl> <int>    <dbl>
##  1         83    234.       236.         159        108.  1947     60.3
##  2         88.5  259.       232.         146.       109.  1948     61.1
##  3         88.2  258.       368.         162.       110.  1949     60.2
##  4         89.5  285.       335.         165        111.  1950     61.2
##  5         96.2  329.       210.         310.       112.  1951     63.2
##  6         98.1  347.       193.         359.       113.  1952     63.6
##  7         99    365.       187          355.       115.  1953     65.0
##  8        100    363.       358.         335        116.  1954     63.8
##  9        101.   397.       290.         305.       117.  1955     66.0
## 10        105.   419.       282.         286.       119.  1956     67.9
## 11        108.   443.       294.         280.       120.  1957     68.2
## 12        111.   445.       468.         264.       122.  1958     66.5
## 13        113.   483.       381.         255.       123.  1959     68.7
## 14        114.   503.       393.         251.       125.  1960     69.6
## 15        116.   518.       481.         257.       128.  1961     69.3
## 16        117.   555.       401.         283.       130.  1962     70.6
cars %>% plot(dist ~ speed, data = ., xlab="Speed", ylab="Stopping Distance")
cars %>% lm(dist ~ speed, data = .) %>% abline(col = "red")

shape_map <- c("setosa" = 1, "versicolor" = 2,  "virginica" = 3) 

iris %$% plot(Petal.Length ~ Petal.Width, pch = shape_map[Species], col = Species)

iris %$% plot(Sepal.Length ~ Sepal.Width, pch = shape_map[Species], col = Species)

#cars %>% qplot(speed, dist, data = .)
iris %>% qplot(Petal.Width, Petal.Length, color = Species, data = .)

iris %>% qplot(Sepal.Width, Sepal.Length, color = Species, data = .)

cars %>% qplot(speed, data = ., bins = 10) 

### Geometries
cars %>% qplot(speed, data = ., geom = "density")

cars %>% qplot (dist, data = ., geom = "density")

#cars %>% qplot(speed, dist, data = .)

ggplot(cars) + geom_point(aes(x = speed, y = dist))

#ggplot(cars, aes(x = speed, y = dist)) + geom_point()

#cars %>% ggplot(aes(x = speed, y = dist)) + geom_point()
iris %>% ggplot + geom_point(aes(x = Petal.Width, y = Petal.Length, color = Species))

iris %>% ggplot +
 geom_point(aes(x = Sepal.Width, y = Sepal.Length,
 color = Species))

iris %>% ggplot +
 geom_point(aes(x = Petal.Width, y = Petal.Length),
 color = "black") 

cars %>% ggplot + geom_histogram(aes(x = speed), bins = 10)

cars %>% ggplot + geom_density(aes(x = speed))

cars %>% ggplot + geom_histogram(aes(x=dist), bins = 10)

cars %>% ggplot + geom_density(aes(x=dist))

cars %>% ggplot(aes(x = speed, y = after_stat(count))) +
 geom_histogram(bins = 10) + geom_density()

cars %>% ggplot(aes(x = dist, y = after_stat(count))) + geom_histogram(bins = 10) + geom_density()

cars %>% ggplot(aes(x = speed, y = dist)) +
 geom_point() + geom_smooth(method = "lm", formula =  y ~ x)

cars %>% ggplot(aes(x = speed, y = dist)) +
 geom_point() + geom_smooth( method = "loess",formula =  y ~ x) 

longley %>% ggplot(aes(x = Year)) + geom_line(aes(y = Unemployed)) + geom_line(aes(y = Armed.Forces), color = "blue") 

longley %>% ggplot(aes(x = Year)) + geom_point(aes(y = Unemployed)) + geom_point(aes(y = Armed.Forces), color = "blue") + geom_line(aes(y = Unemployed)) + geom_line(aes(y = Armed.Forces), color = "blue") 

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value, color = key)) + geom_line() + geom_point()

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value)) + geom_line() +
 facet_grid(key ~ .)  #### Facet row-wise with lines

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value)) + geom_point() +
 facet_grid(key ~ .) #### Facet row-wise with points

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value)) + geom_line() +
 facet_grid(. ~ key  ) #### Facet column-wise with lines

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value)) + geom_point() +
 facet_grid(. ~ key) #### Facet column-wise with points

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value)) + geom_boxplot() +
 facet_grid(Measurement ~ .) # row-wise 

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value)) + geom_boxplot() +
 facet_grid(. ~ Measurement ) # Column wise

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value)) +geom_boxplot() +
 facet_grid(Measurement ~ ., scale = "free_y") 

label_map <- c(Petal.Width = "Petal Width",
 Petal.Length = "Petal Length",
 Sepal.Width = "Sepal Width",
 Sepal.Length = "Sepal Length")
 
iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value)) +
 geom_boxplot() + facet_grid(Measurement ~ ., scale = "free_y", labeller = labeller(Measurement = label_map))

### Scaling 
cars %>% ggplot(aes(x = speed, y = dist)) + geom_point() + geom_smooth(method = "lm", formula =  y ~ x) + scale_x_continuous("Speed") + scale_y_continuous("Stopping Distance") 

longley %>% gather(key, value, Unemployed, Armed.Forces) %>%
 ggplot(aes(x = Year, y = value)) + geom_line() + geom_point() + scale_x_continuous(breaks = 1947:1962) +
 facet_grid(key ~ .)

cars %>% ggplot(aes(x = speed, y = dist)) +
 geom_point() + geom_smooth(method = "lm", formula =  y ~ x) + scale_x_reverse("Speed") + scale_y_continuous("Stopping Distance") 

cars %>% ggplot(aes(x = speed, y = dist)) + geom_point() + geom_smooth(method = "lm", formula =  y ~ x) + scale_x_continuous("speed") + scale_y_reverse("Stopping Distance")

iris %>% ggplot(aes(x = Species, y = Petal.Length)) +
 geom_boxplot() + geom_jitter(width = 0.1, height = 0.1) 

iris %>% ggplot(aes(x = Species, y = Petal.Length)) +
 geom_boxplot() + geom_jitter(width = 0.1, height = 0.1) +
 scale_x_discrete(labels = c("setosa" = "Setosa",
 "versicolor" = "Versicolor", "virginica" = "Virginica")) 

iris %>% ggplot(aes(x = Species, y = Sepal.Length)) +
 geom_boxplot() + geom_jitter(width = 0.001, height = 0.001) + scale_x_discrete(labels = c("setosa" = "Setosa",
 "versicolor" = "Versicolor", "virginica" = "Virginica")) 

#??geom_jitter
iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + facet_grid(Measurement ~ ., scale = "free_y", labeller = labeller(Measurement = label_map))

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_fill_manual(values = c("red", "green", "blue")) + facet_grid(Measurement ~ ., scale = "free_y", labeller = labeller(Measurement = label_map)) 

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_fill_brewer(palette = "Greens") +
 facet_grid(Measurement ~ ., scale = "free_y", labeller = labeller(Measurement = label_map)) 

#theme_set(theme_bw(2)) 

#old <- theme_set(theme_bw())
#theme_set(old)
#theme_replace(panel.grid.minor = element_line(colour = "red"))
iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_x_discrete(labels = c("setosa" = "Setosa", "versicolor" = "Versicolor", "virginica" = "Virginica")) + scale_fill_brewer(palette = "Reds") +
 facet_grid(Measurement ~ ., switch = "y", labeller = labeller(Measurement = label_map)) + coord_flip() 

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_x_discrete(labels = c("setosa" = "Setosa", "versicolor" = "Versicolor", "virginica" = "Virginica")) + scale_fill_brewer(palette = "Blues") +
 facet_grid(Measurement ~ ., switch = "y", labeller = labeller(Measurement = label_map)) + coord_flip()

iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_x_discrete(labels = c("setosa" = "Setosa", "versicolor" = "Versicolor", "virginica" = "Virginica")) + scale_fill_brewer(palette = "Greens") +
 facet_grid(Measurement ~ ., switch = "y", labeller = labeller(Measurement = label_map)) + coord_flip() + theme(strip.background = element_blank()) + theme(legend.position ="top")

label_map <- c(Petal.Width = "Petal Width",Petal.Length = "Petal Length",Sepal.Width = "Sepal Width",Sepal.Length = "Sepal Length")
 
species_map <- c(setosa = "Setosa",versicolor = "Versicolor",virginica = "Virginica")
 
iris %>% gather(Measurement, Value, -Species) %>%
 ggplot(aes(x = Species, y = Value, fill = Species)) +
 geom_boxplot() + scale_x_discrete(labels = species_map) +
 scale_fill_brewer(palette = "Greens", labels = species_map) + facet_grid(Measurement ~ ., switch = "y",
 labeller = labeller(Measurement = label_map)) +
 coord_flip() + theme(strip.background = element_blank()) +
 theme(legend.position="bottom") 

### Figures with Multiple Plots
petal <- iris %>% ggplot() +
 geom_point(aes(x = Petal.Width, y = Petal.Length,
 color = Species)) + theme(legend.position = "none")
 
sepal <- iris %>% ggplot() +
 geom_point(aes(x = Sepal.Width, y = Sepal.Length,
 color = Species)) + theme(legend.position = "none")
 
library(gridExtra)
grid.arrange(petal, sepal, nrow = 2, ncol = 1)

grid.arrange(petal, sepal, nrow = 1, ncol= 2)

library(cowplot)
plot_grid(petal, sepal, labels = c("A", "B"))

## **Chapter 5** 
### Working With Larges Data sets
iris %>% sample_n(size = 10)
##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1           5.3         3.7          1.5         0.2     setosa
## 2           4.4         3.0          1.3         0.2     setosa
## 3           6.3         3.3          6.0         2.5  virginica
## 4           4.4         3.2          1.3         0.2     setosa
## 5           6.9         3.1          5.4         2.1  virginica
## 6           5.0         3.5          1.6         0.6     setosa
## 7           6.0         2.9          4.5         1.5 versicolor
## 8           5.5         2.3          4.0         1.3 versicolor
## 9           5.1         3.8          1.5         0.3     setosa
## 10          6.5         2.8          4.6         1.5 versicolor
iris %>% sample_frac(size = 0.02) 
##   Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1          4.4         3.2          1.3         0.2     setosa
## 2          5.6         2.7          4.2         1.3 versicolor
## 3          5.7         2.5          5.0         2.0  virginica
library(pryr) 
mem_change(x <- rnorm(10000)) 
## 81.3 kB
mem_change(x[1] <- 0) 
## 736 B
mem_change(y <- x) 
## -120 B
mem_change(x[1] <- 0) 
## 80.8 kB
d <- data.frame(x = rnorm(10000), y = rnorm(10000))

d %>% ggplot(aes(x = x, y = y)) + geom_point(alpha = 0.2)

mem_change(d)
## 528 B
d %>% ggplot(aes(x = x, y = y)) + geom_density_2d()

mem_change(d)
## -720 B
#install.packages("hexbin")
d %>% ggplot(aes(x = x, y = y)) + geom_hex() + scale_fill_gradient(low = "lightgray", high = "red") + geom_density2d(color = "black")

d %>% ggplot(aes(x = x, y = y)) + geom_hex()

library(ff)

ffcars <- as.ffdf(cars)
summary(ffcars)
##       Length Class     Mode
## speed 50     ff_vector list
## dist  50     ff_vector list
ffiris <- as.ffdf(iris)
summary(ffiris)
##              Length Class     Mode
## Sepal.Length 150    ff_vector list
## Sepal.Width  150    ff_vector list
## Petal.Length 150    ff_vector list
## Petal.Width  150    ff_vector list
## Species      150    ff_vector list
library(biglm)

model <- biglm(dist ~ speed, data = ffcars)
summary(model) 
## Large data regression model: biglm(dist ~ speed, data = ffcars)
## Sample size =  50 
##                 Coef     (95%     CI)     SE      p
## (Intercept) -17.5791 -31.0960 -4.0622 6.7584 0.0093
## speed         3.9324   3.1014  4.7634 0.4155 0.0000
iris_db <- src_sqlite("iris_db.sqlite3", create = TRUE)

iris_sqlite <- tbl(iris_db, "iris", temporary = FALSE)

iris_sqlite %>% group_by(Species) %>%
 summarise(mean.Petal.Length = mean(Petal.Length))
## # Source:   SQL [?? x 2]
## # Database: sqlite 3.47.1 [C:\Users\HP\Desktop\Data Science\Charles_Nana\iris_db.sqlite3]
##   Species    mean.Petal.Length
##   <chr>                  <dbl>
## 1 setosa                  1.46
## 2 versicolor              4.26
## 3 virginica               5.55
## **Chapter 6** 
## **Supervise Machine Learning**
### Linear Regression
cars %>% head()
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
cars %>% ggplot(aes(x = speed, y = dist)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) 

cars %>% lm(dist ~ speed, data = .) %>% summary 
## 
## Call:
## lm(formula = dist ~ speed, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## speed         3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
cars %>% lm(dist ~ speed, data = .) %>% coefficients 
## (Intercept)       speed 
##  -17.579095    3.932409
cars %>% lm(dist ~ speed, data = .) %>% confint 
##                  2.5 %    97.5 %
## (Intercept) -31.167850 -3.990340
## speed         3.096964  4.767853
predict_dist <- function(speed, theta_1)
 data.frame(speed = speed,
 dist = theta_1 * speed,
 theta = as.factor(theta_1))

cars %>% ggplot(aes(x = speed, y = dist, colour = theta)) +
 geom_point(colour = "black") +
 geom_line(data = predict_dist(cars$speed, 2)) +
 geom_line(data = predict_dist(cars$speed, 3)) +
 geom_line(data = predict_dist(cars$speed, 4)) +
 scale_color_discrete(name = expression(theta[1]))

thetas <- seq(0, 5, length.out = 50)

fitting_error <- Vectorize(function(theta)
 sum((theta * cars$speed - cars$dist)**2)
) 

data.frame(thetas = thetas, errors = fitting_error(thetas)) %>%
 ggplot(aes(x = thetas, y = errors)) +
 geom_line() +
 xlab(expression(theta[1])) + ylab("") 

cars %>% lm(dist ~ speed - 1, data = .) %>% coefficients
##    speed 
## 2.909132
cars %>% ggplot(aes(x = speed, y = dist)) +
 geom_point() +
 geom_smooth(method = "lm", formula = y ~ x - 1)

### Logistic Regression
data("BreastCancer")
BreastCancer %>% head
##        Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 1000025            5         1          1             1            2
## 2 1002945            5         4          4             5            7
## 3 1015425            3         1          1             1            2
## 4 1016277            6         8          8             1            3
## 5 1017023            4         1          1             3            2
## 6 1017122            8        10         10             8            7
##   Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses     Class
## 1           1           3               1       1    benign
## 2          10           3               2       1    benign
## 3           2           3               1       1    benign
## 4           4           3               7       1    benign
## 5           1           3               1       1    benign
## 6          10           9               7       1 malignant
BreastCancer %>% ggplot(aes(x = Cl.thickness, y = Class)) +
 geom_jitter(height = 0.05, width = 0.3, alpha = 0.4)

#data("BreastCancer")
#BreastCancer %>% sample_frac(0.4) %>% head
BreastCancer %>% ggplot(aes(x = Cell.size, y = Class)) +
geom_jitter(height = 0.05, width = .3, alpha = 0.4)

BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 ggplot(aes(x = Cl.thickness.numeric, y = IsMalignant)) + geom_jitter(height = 0.05, width = 0.3, alpha=0.4) +
 geom_smooth(method = "glm", formula = y ~ x, 
 method.args = list(family = "binomial")) 

BreastCancer %>% mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 glm(IsMalignant ~ Cl.thickness.numeric,
 family = "binomial", data = .) 
## 
## Call:  glm(formula = IsMalignant ~ Cl.thickness.numeric, family = "binomial", 
##     data = .)
## 
## Coefficients:
##          (Intercept)  Cl.thickness.numeric  
##              -5.1602                0.9355  
## 
## Degrees of Freedom: 698 Total (i.e. Null);  697 Residual
## Null Deviance:       900.5 
## Residual Deviance: 464.1     AIC: 468.1
### Model Matrice and Formula
cars %>% model.matrix(dist ~ speed, data = .) %>% head(5)
##   (Intercept) speed
## 1           1     4
## 2           1     4
## 3           1     7
## 4           1     7
## 5           1     8
cars %>% model.matrix(dist ~ speed - 1, data = .) %>% head(5) 
##   speed
## 1     4
## 2     4
## 3     7
## 4     7
## 5     8
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),
 Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric,
 data = .) %>% head() 
##   (Intercept) Cl.thickness.numeric Cell.size.numeric
## 1           1                    5                 1
## 2           1                    5                 4
## 3           1                    3                 1
## 4           1                    6                 8
## 5           1                    4                 1
## 6           1                    8                10
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),
 Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric - 1,
 data = .) %>% head() 
##   Cl.thickness.numeric Cell.size.numeric
## 1                    5                 1
## 2                    5                 4
## 3                    3                 1
## 4                    6                 8
## 5                    4                 1
## 6                    8                10
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),
 Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 glm(IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric - 1, family = "binomial", data = .)
## 
## Call:  glm(formula = IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric - 
##     1, family = "binomial", data = .)
## 
## Coefficients:
## Cl.thickness.numeric     Cell.size.numeric  
##              -0.3182                0.5624  
## 
## Degrees of Freedom: 699 Total (i.e. Null);  697 Residual
## Null Deviance:       969 
## Residual Deviance: 798.8     AIC: 802.8
 BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),
 Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 glm(IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric,
 family = "binomial", data = .) 
## 
## Call:  glm(formula = IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric, 
##     family = "binomial", data = .)
## 
## Coefficients:
##          (Intercept)  Cl.thickness.numeric     Cell.size.numeric  
##              -7.1517                0.6174                1.1751  
## 
## Degrees of Freedom: 698 Total (i.e. Null);  696 Residual
## Null Deviance:       900.5 
## Residual Deviance: 212.3     AIC: 218.3
BreastCancer %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Bare.nuclei, data = .) %>%
 head() 
##   (Intercept) Bare.nuclei2 Bare.nuclei3 Bare.nuclei4 Bare.nuclei5 Bare.nuclei6
## 1           1            0            0            0            0            0
## 2           1            0            0            0            0            0
## 3           1            1            0            0            0            0
## 4           1            0            0            1            0            0
## 5           1            0            0            0            0            0
## 6           1            0            0            0            0            0
##   Bare.nuclei7 Bare.nuclei8 Bare.nuclei9 Bare.nuclei10
## 1            0            0            0             0
## 2            0            0            0             1
## 3            0            0            0             0
## 4            0            0            0             0
## 5            0            0            0             0
## 6            0            0            0             1
BreastCancer %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness, data = .) %>%
 head() 
##   (Intercept) Cl.thickness.L Cl.thickness.Q Cl.thickness.C Cl.thickness^4
## 1           1    -0.05504819    -0.34815531      0.1295501     0.33658092
## 2           1    -0.05504819    -0.34815531      0.1295501     0.33658092
## 3           1    -0.27524094    -0.08703883      0.3778543    -0.31788198
## 4           1     0.05504819    -0.34815531     -0.1295501     0.33658092
## 5           1    -0.16514456    -0.26111648      0.3346710     0.05609682
## 6           1     0.27524094    -0.08703883     -0.3778543    -0.31788198
##   Cl.thickness^5 Cl.thickness^6 Cl.thickness^7 Cl.thickness^8 Cl.thickness^9
## 1    -0.21483446     -0.3113996      0.3278724      0.2617852     -0.5714300
## 2    -0.21483446     -0.3113996      0.3278724      0.2617852     -0.5714300
## 3    -0.03580574      0.3892495     -0.5035184      0.3739788     -0.1632657
## 4     0.21483446     -0.3113996     -0.3278724      0.2617852      0.5714300
## 5    -0.39386318      0.2335497      0.2459043     -0.5235703      0.3809534
## 6     0.03580574      0.3892495      0.5035184      0.3739788      0.1632657
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness.numeric * Cell.size.numeric, data = .) %>% head() 
##   (Intercept) Cl.thickness.numeric Cell.size.numeric
## 1           1                    5                 1
## 2           1                    5                 4
## 3           1                    3                 1
## 4           1                    6                 8
## 5           1                    4                 1
## 6           1                    8                10
##   Cl.thickness.numeric:Cell.size.numeric
## 1                                      5
## 2                                     20
## 3                                      3
## 4                                     48
## 5                                      4
## 6                                     80
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness.numeric * Bare.nuclei, data = .) %>% head() 
##   (Intercept) Cl.thickness.numeric Bare.nuclei2 Bare.nuclei3 Bare.nuclei4
## 1           1                    5            0            0            0
## 2           1                    5            0            0            0
## 3           1                    3            1            0            0
## 4           1                    6            0            0            1
## 5           1                    4            0            0            0
## 6           1                    8            0            0            0
##   Bare.nuclei5 Bare.nuclei6 Bare.nuclei7 Bare.nuclei8 Bare.nuclei9
## 1            0            0            0            0            0
## 2            0            0            0            0            0
## 3            0            0            0            0            0
## 4            0            0            0            0            0
## 5            0            0            0            0            0
## 6            0            0            0            0            0
##   Bare.nuclei10 Cl.thickness.numeric:Bare.nuclei2
## 1             0                                 0
## 2             1                                 0
## 3             0                                 3
## 4             0                                 0
## 5             0                                 0
## 6             1                                 0
##   Cl.thickness.numeric:Bare.nuclei3 Cl.thickness.numeric:Bare.nuclei4
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 0                                 6
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei5 Cl.thickness.numeric:Bare.nuclei6
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 0                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei7 Cl.thickness.numeric:Bare.nuclei8
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 0                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei9 Cl.thickness.numeric:Bare.nuclei10
## 1                                 0                                  0
## 2                                 0                                  5
## 3                                 0                                  0
## 4                                 0                                  0
## 5                                 0                                  0
## 6                                 0                                  8
BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1)) %>%
 model.matrix(IsMalignant ~ Cl.thickness.numeric : Bare.nuclei, data = .) %>% head() 
##   (Intercept) Cl.thickness.numeric:Bare.nuclei1
## 1           1                                 5
## 2           1                                 0
## 3           1                                 0
## 4           1                                 0
## 5           1                                 4
## 6           1                                 0
##   Cl.thickness.numeric:Bare.nuclei2 Cl.thickness.numeric:Bare.nuclei3
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 3                                 0
## 4                                 0                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei4 Cl.thickness.numeric:Bare.nuclei5
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 6                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei6 Cl.thickness.numeric:Bare.nuclei7
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 0                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei8 Cl.thickness.numeric:Bare.nuclei9
## 1                                 0                                 0
## 2                                 0                                 0
## 3                                 0                                 0
## 4                                 0                                 0
## 5                                 0                                 0
## 6                                 0                                 0
##   Cl.thickness.numeric:Bare.nuclei10
## 1                                  0
## 2                                  5
## 3                                  0
## 4                                  0
## 5                                  0
## 6                                  8
cars %>% model.matrix(dist ~ speed + speed^2, data = .) %>% head 
##   (Intercept) speed
## 1           1     4
## 2           1     4
## 3           1     7
## 4           1     7
## 5           1     8
## 6           1     9
### Polynomial function
cars %>% model.matrix(dist ~ speed + I(speed^2), data = .) %>% head 
##   (Intercept) speed I(speed^2)
## 1           1     4         16
## 2           1     4         16
## 3           1     7         49
## 4           1     7         49
## 5           1     8         64
## 6           1     9         81
cars %>% lm(dist ~ speed + I(speed^2), data = .) %>%
 summary
## 
## Call:
## lm(formula = dist ~ speed + I(speed^2), data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.720  -9.184  -3.188   4.628  45.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.47014   14.81716   0.167    0.868
## speed        0.91329    2.03422   0.449    0.656
## I(speed^2)   0.09996    0.06597   1.515    0.136
## 
## Residual standard error: 15.18 on 47 degrees of freedom
## Multiple R-squared:  0.6673, Adjusted R-squared:  0.6532 
## F-statistic: 47.14 on 2 and 47 DF,  p-value: 5.852e-12
cars %>% ggplot(aes(x = speed, y = dist)) + geom_point() +
 geom_smooth(method = "lm", formula = y ~ x + I(x^2))

iris %>% model.matrix(Sepal.Width ~ Sepal.Length, data = .) %>% head
##   (Intercept) Sepal.Length
## 1           1          5.1
## 2           1          4.9
## 3           1          4.7
## 4           1          4.6
## 5           1          5.0
## 6           1          5.4
iris %>% model.matrix(Sepal.Width ~ Sepal.Length + I(Sepal.Length^2), data=.) %>% head()
##   (Intercept) Sepal.Length I(Sepal.Length^2)
## 1           1          5.1             26.01
## 2           1          4.9             24.01
## 3           1          4.7             22.09
## 4           1          4.6             21.16
## 5           1          5.0             25.00
## 6           1          5.4             29.16
iris %>% lm(Sepal.Width ~ Sepal.Length + I(Sepal.Length^2), data = .) %>% summary
## 
## Call:
## lm(formula = Sepal.Width ~ Sepal.Length + I(Sepal.Length^2), 
##     data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13070 -0.26310 -0.02446  0.25728  1.38725 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.41584    1.58499   4.048 8.33e-05 ***
## Sepal.Length      -1.08556    0.53625  -2.024   0.0447 *  
## I(Sepal.Length^2)  0.08571    0.04476   1.915   0.0574 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4304 on 147 degrees of freedom
## Multiple R-squared:  0.03783,    Adjusted R-squared:  0.02474 
## F-statistic:  2.89 on 2 and 147 DF,  p-value: 0.05877
iris %>% ggplot(aes(x = Sepal.Width, y = Sepal.Length)) + geom_point( color = "lightgreen") +
 geom_smooth(method = "lm", formula = y ~ x + I(x^2))

### Validating Models
line <- cars %>% lm(dist ~ speed, data = .)

poly <- cars %>% lm(dist ~ speed + I(speed^2), data = .)

predict(line, cars) %>% head
##         1         2         3         4         5         6 
## -1.849460 -1.849460  9.947766  9.947766 13.880175 17.812584
predict(poly, cars) %>% head 
##         1         2         3         4         5         6 
##  7.722637  7.722637 13.761157 13.761157 16.173834 18.786430
### Evaluating Regression Models
rmse <- function(x,t) sqrt(mean(sum((t - x)^2)))
rmse(predict(line, cars), cars$dist)
## [1] 106.5529
rmse(predict(poly, cars), cars$dist)
## [1] 104.0419
training_data <- cars[1:25,]
test_data <- cars[26:50,] 
line <- training_data %>% lm(dist ~ speed, data = .)
poly <- training_data %>% lm(dist ~ speed + I(speed^2), data = .) 
rmse(predict(line, test_data), test_data$dist) 
## [1] 88.89189
rmse(predict(poly, test_data), test_data$dist) 
## [1] 83.84263
sampled_cars <- cars %>% mutate(training = sample(0:1, nrow(cars), replace = TRUE))
sampled_cars %>% head 
##   speed dist training
## 1     4    2        0
## 2     4   10        0
## 3     7    4        1
## 4     7   22        0
## 5     8   16        0
## 6     9   10        1
training_data <- sampled_cars %>% filter(training == 1)
test_data <- sampled_cars %>% filter(training == 0)
training_data %>% head 
##   speed dist training
## 1     7    4        1
## 2     9   10        1
## 3    10   26        1
## 4    11   17        1
## 5    11   28        1
## 6    12   20        1
test_data %>% head 
##   speed dist training
## 1     4    2        0
## 2     4   10        0
## 3     7   22        0
## 4     8   16        0
## 5    10   18        0
## 6    10   34        0
line <- training_data %>% lm(dist ~ speed, data = .)
poly <- training_data %>% lm(dist ~ speed + I(speed^2), data = .) 
rmse(predict(line, test_data), test_data$dist) 
## [1] 93.54925
rmse(predict(poly, test_data), test_data$dist) 
## [1] 87.80469
### Evaluating Classification Models
formatted_data <- BreastCancer %>%
 mutate(Cl.thickness.numeric =
 as.numeric(as.character(Cl.thickness)),
 Cell.size.numeric =
 as.numeric(as.character(Cell.size))) %>%
 mutate(IsMalignant = ifelse(Class == "benign", 0, 1))
 
 fitted_model <- formatted_data %>%
 glm(IsMalignant ~ Cl.thickness.numeric + Cell.size.numeric, data = .)

predict(fitted_model, formatted_data, type = "response") %>% head 
##          1          2          3          4          5          6 
## 0.17365879 0.45878552 0.06461445 0.89347668 0.11913662 1.19260550
### Confusion Matrix
classify <- function(probability) ifelse(probability < 0.5, 0, 1)
classified_malignant <- classify(predict(fitted_model, formatted_data)) 
table(formatted_data$IsMalignant, classified_malignant) 
##    classified_malignant
##       0   1
##   0 450   8
##   1  42 199
table(formatted_data$IsMalignant, classified_malignant,
 dnn = c("Data", "Predictions"))
##     Predictions
## Data   0   1
##    0 450   8
##    1  42 199
classify <- function(probability)
 ifelse(probability < 0.5, "benign", "malignant")

classified <- classify(predict(fitted_model, formatted_data))
table(formatted_data$Class, classified, dnn = c("Data", "Predictions")) 
##            Predictions
## Data        benign malignant
##   benign       450         8
##   malignant     42       199
### Accuracy
confusion_matrix <- table(formatted_data$Class, classified,dnn = c("Data", "Predictions"))

(accuracy <- sum(diag(confusion_matrix))/sum(confusion_matrix))
## [1] 0.9284692
#table(BreastCancer$Class) 
tbl <- table(BreastCancer$Class)
tbl["benign"] / sum(tbl)
##    benign 
## 0.6552217
mbl = table (BreastCancer$Class)
mbl["malignant"] / sum(mbl)
## malignant 
## 0.3447783
table(BreastCancer$Class, sample(BreastCancer$Class)) # random sampling
##            
##             benign malignant
##   benign       296       162
##   malignant    162        79
#replicate(8, acc(table(BreastCancer$Class,sample(BreastCancer$Class))))

#(accuracy(table(BreastCancer$Class,sample(BreastCancer$Class))))
### Sensitivity and Specificity
(specificity <- confusion_matrix[1,1]/
 (confusion_matrix[1,1] + confusion_matrix[1,2])) # negative class
## [1] 0.9825328
(sensitivity <- confusion_matrix[2,2]/
 (confusion_matrix[2,1] + confusion_matrix[2,2])) # positive class
## [1] 0.8257261
specificity <- function(confusion_matrix)
 confusion_matrix[1,1]/(confusion_matrix[1,1] + confusion_matrix[1,2])
 
sensitivity <- function(confusion_matrix)
 confusion_matrix[2,2]/(confusion_matrix[2,1] + confusion_matrix[2,2])
 
prediction_summary <- function(confusion_matrix)
 c("accuracy" = accuracy(confusion_matrix),
 "specificity" = specificity(confusion_matrix),
 "sensitivity" = sensitivity(confusion_matrix))
 
random_prediction_summary <- function()
 prediction_summary(table(BreastCancer$Class,
 sample(BreastCancer$Class)))
 
#replicate(3, random_prediction_summary())
### Other Measures
confusion_matrix[2,1] / sum(confusion_matrix[,1])
## [1] 0.08536585
confusion_matrix[1,1] / sum(confusion_matrix[,1]) 
## [1] 0.9146341
confusion_matrix[2,2] / sum(confusion_matrix[,2]) 
## [1] 0.9613527
confusion_matrix[1,2] / sum(confusion_matrix[,2]) 
## [1] 0.03864734
### Random Permutation of Data
permuted_cars <- cars[sample(1:nrow(cars)),]
permuted_cars %>% head()
##    speed dist
## 39    20   32
## 8     10   26
## 30    17   40
## 6      9   10
## 18    13   34
## 5      8   16
permute_rows <- function(df) df[sample(1:nrow(df)),]
permuted_cars <- cars %>% permute_rows

group_data <- function(df, n) {
 groups <- rep(1:n, each = nrow(df)/n)
 split(df, groups)
}
cars %>% permute_rows %>% group_data(5) %>% head(1)
## $`1`
##    speed dist
## 9     10   34
## 49    24  120
## 7     10   18
## 2      4   10
## 25    15   26
## 4      7   22
## 20    14   26
## 14    12   24
## 5      8   16
## 11    11   28
grouped_cars <- cars %>% permute_rows %>% group_data(5)
grouped_cars[[1]] 
##    speed dist
## 33    18   56
## 40    20   48
## 6      9   10
## 43    20   64
## 34    18   76
## 37    19   46
## 3      7    4
## 25    15   26
## 13    12   20
## 18    13   34
grouped_cars[1] 
## $`1`
##    speed dist
## 33    18   56
## 40    20   48
## 6      9   10
## 43    20   64
## 34    18   76
## 37    19   46
## 3      7    4
## 25    15   26
## 13    12   20
## 18    13   34
(grouped_cars[[1]] %>%
 lm(dist ~ speed, data = .) %>%.$coefficients)
## (Intercept)       speed 
##   -29.20874     4.47740
qplot(grouped_cars[[1]] %>%lm(dist ~ speed, data = .) %>%.$coefficients)

estimates <- grouped_cars[[1]] %>%
 lm(dist ~ speed, data = .) %>%.$coefficients

for (i in 2:length(grouped_cars)) {
 group_estimates <- grouped_cars[[i]] %>%
 lm(dist ~ speed, data = .) %>%
 .$coefficients
 estimates <- rbind(estimates, group_estimates)
}
estimates 
##                 (Intercept)    speed
## estimates        -29.208735 4.477400
## group_estimates  -17.539144 4.614327
## group_estimates   16.330709 2.244094
## group_estimates   -9.499446 2.892580
## group_estimates  -20.833333 3.989035
library(purrr) 

estimates <- grouped_cars %>%
 map(. %>% lm(dist ~ speed, data = .) %>% .$coefficients)
 
estimates 
## $`1`
## (Intercept)       speed 
##   -29.20874     4.47740 
## 
## $`2`
## (Intercept)       speed 
##  -17.539144    4.614327 
## 
## $`3`
## (Intercept)       speed 
##   16.330709    2.244094 
## 
## $`4`
## (Intercept)       speed 
##   -9.499446    2.892580 
## 
## $`5`
## (Intercept)       speed 
##  -20.833333    3.989035
estimates <- grouped_cars %>%
 map(. %>% lm(dist ~ speed, data = .) %>% .$coefficients) %>% do.call("rbind", .)
 
estimates
##   (Intercept)    speed
## 1  -29.208735 4.477400
## 2  -17.539144 4.614327
## 3   16.330709 2.244094
## 4   -9.499446 2.892580
## 5  -20.833333 3.989035
### Cross Validation
cross_validation_groups <- function(grouped_df) {
 result <- vector(mode = "list", length = length(grouped_df))
 for (i in seq_along(grouped_df)) {
 result[[i]] <- grouped_df[-i] %>% do.call("rbind", .)
 }
 result
} 

cars %>% permute_rows %>% group_data(5) %>% cross_validation_groups %>% map(. %>% lm(dist ~ speed, data = .) %>% .$coefficients) %>% do.call("rbind", .)
##      (Intercept)    speed
## [1,]   -19.21612 4.209059
## [2,]   -25.09475 4.375456
## [3,]   -16.60570 3.894515
## [4,]   -17.04083 3.752234
## [5,]   -11.50681 3.509767
cross_validation_split <- function(grouped_df) {
 result <- vector(mode = "list", length = length(grouped_df))
 for (i in seq_along(grouped_df)) {
 training <- grouped_df[-i] %>% do.call("rbind", .)
 test <- grouped_df[[i]]
 result[[i]] <- list(training = training, test = test)
 }
 result
}

cars %>%
 permute_rows %>%
 group_data(5) %>%
 cross_validation_split
## [[1]]
## [[1]]$training
##      speed dist
## 2.31    17   50
## 2.8     10   26
## 2.37    19   46
## 2.5      8   16
## 2.23    14   80
## 2.39    20   32
## 2.50    25   85
## 2.34    18   76
## 2.15    12   28
## 2.22    14   60
## 3.11    11   28
## 3.44    22   66
## 3.21    14   36
## 3.33    18   56
## 3.40    20   48
## 3.2      4   10
## 3.12    12   14
## 3.14    12   24
## 3.47    24   92
## 3.32    18   42
## 4.4      7   22
## 4.20    14   26
## 4.19    13   46
## 4.38    19   68
## 4.18    13   34
## 4.46    24   70
## 4.6      9   10
## 4.13    12   20
## 4.42    20   56
## 4.1      4    2
## 5.43    20   64
## 5.7     10   18
## 5.10    11   17
## 5.30    17   40
## 5.17    13   34
## 5.35    18   84
## 5.48    24   93
## 5.24    15   20
## 5.16    13   26
## 5.45    23   54
## 
## [[1]]$test
##    speed dist
## 25    15   26
## 27    16   32
## 26    15   54
## 49    24  120
## 3      7    4
## 41    20   52
## 29    17   32
## 28    16   40
## 9     10   34
## 36    19   36
## 
## 
## [[2]]
## [[2]]$training
##      speed dist
## 1.25    15   26
## 1.27    16   32
## 1.26    15   54
## 1.49    24  120
## 1.3      7    4
## 1.41    20   52
## 1.29    17   32
## 1.28    16   40
## 1.9     10   34
## 1.36    19   36
## 3.11    11   28
## 3.44    22   66
## 3.21    14   36
## 3.33    18   56
## 3.40    20   48
## 3.2      4   10
## 3.12    12   14
## 3.14    12   24
## 3.47    24   92
## 3.32    18   42
## 4.4      7   22
## 4.20    14   26
## 4.19    13   46
## 4.38    19   68
## 4.18    13   34
## 4.46    24   70
## 4.6      9   10
## 4.13    12   20
## 4.42    20   56
## 4.1      4    2
## 5.43    20   64
## 5.7     10   18
## 5.10    11   17
## 5.30    17   40
## 5.17    13   34
## 5.35    18   84
## 5.48    24   93
## 5.24    15   20
## 5.16    13   26
## 5.45    23   54
## 
## [[2]]$test
##    speed dist
## 31    17   50
## 8     10   26
## 37    19   46
## 5      8   16
## 23    14   80
## 39    20   32
## 50    25   85
## 34    18   76
## 15    12   28
## 22    14   60
## 
## 
## [[3]]
## [[3]]$training
##      speed dist
## 1.25    15   26
## 1.27    16   32
## 1.26    15   54
## 1.49    24  120
## 1.3      7    4
## 1.41    20   52
## 1.29    17   32
## 1.28    16   40
## 1.9     10   34
## 1.36    19   36
## 2.31    17   50
## 2.8     10   26
## 2.37    19   46
## 2.5      8   16
## 2.23    14   80
## 2.39    20   32
## 2.50    25   85
## 2.34    18   76
## 2.15    12   28
## 2.22    14   60
## 4.4      7   22
## 4.20    14   26
## 4.19    13   46
## 4.38    19   68
## 4.18    13   34
## 4.46    24   70
## 4.6      9   10
## 4.13    12   20
## 4.42    20   56
## 4.1      4    2
## 5.43    20   64
## 5.7     10   18
## 5.10    11   17
## 5.30    17   40
## 5.17    13   34
## 5.35    18   84
## 5.48    24   93
## 5.24    15   20
## 5.16    13   26
## 5.45    23   54
## 
## [[3]]$test
##    speed dist
## 11    11   28
## 44    22   66
## 21    14   36
## 33    18   56
## 40    20   48
## 2      4   10
## 12    12   14
## 14    12   24
## 47    24   92
## 32    18   42
## 
## 
## [[4]]
## [[4]]$training
##      speed dist
## 1.25    15   26
## 1.27    16   32
## 1.26    15   54
## 1.49    24  120
## 1.3      7    4
## 1.41    20   52
## 1.29    17   32
## 1.28    16   40
## 1.9     10   34
## 1.36    19   36
## 2.31    17   50
## 2.8     10   26
## 2.37    19   46
## 2.5      8   16
## 2.23    14   80
## 2.39    20   32
## 2.50    25   85
## 2.34    18   76
## 2.15    12   28
## 2.22    14   60
## 3.11    11   28
## 3.44    22   66
## 3.21    14   36
## 3.33    18   56
## 3.40    20   48
## 3.2      4   10
## 3.12    12   14
## 3.14    12   24
## 3.47    24   92
## 3.32    18   42
## 5.43    20   64
## 5.7     10   18
## 5.10    11   17
## 5.30    17   40
## 5.17    13   34
## 5.35    18   84
## 5.48    24   93
## 5.24    15   20
## 5.16    13   26
## 5.45    23   54
## 
## [[4]]$test
##    speed dist
## 4      7   22
## 20    14   26
## 19    13   46
## 38    19   68
## 18    13   34
## 46    24   70
## 6      9   10
## 13    12   20
## 42    20   56
## 1      4    2
## 
## 
## [[5]]
## [[5]]$training
##      speed dist
## 1.25    15   26
## 1.27    16   32
## 1.26    15   54
## 1.49    24  120
## 1.3      7    4
## 1.41    20   52
## 1.29    17   32
## 1.28    16   40
## 1.9     10   34
## 1.36    19   36
## 2.31    17   50
## 2.8     10   26
## 2.37    19   46
## 2.5      8   16
## 2.23    14   80
## 2.39    20   32
## 2.50    25   85
## 2.34    18   76
## 2.15    12   28
## 2.22    14   60
## 3.11    11   28
## 3.44    22   66
## 3.21    14   36
## 3.33    18   56
## 3.40    20   48
## 3.2      4   10
## 3.12    12   14
## 3.14    12   24
## 3.47    24   92
## 3.32    18   42
## 4.4      7   22
## 4.20    14   26
## 4.19    13   46
## 4.38    19   68
## 4.18    13   34
## 4.46    24   70
## 4.6      9   10
## 4.13    12   20
## 4.42    20   56
## 4.1      4    2
## 
## [[5]]$test
##    speed dist
## 43    20   64
## 7     10   18
## 10    11   17
## 30    17   40
## 17    13   34
## 35    18   84
## 48    24   93
## 24    15   20
## 16    13   26
## 45    23   54
prediction_accuracy_cars <- function(test_and_training) {
 result <- vector(mode = "numeric",
 length = length(test_and_training))
 for (i in seq_along(test_and_training)) {
 training <- test_and_training[[i]]$training
 test <- test_and_training[[i]]$test
 model <- training %>% lm(dist ~ speed, data = .)
 predictions <- test %>% predict(model, data = .)
 targets <- test$dist
 result[i] <- rmse(targets, predictions)
 }
 result
}

cars %>%permute_rows %>%group_data(5) %>% cross_validation_split %>%prediction_accuracy_cars
## [1] 242.8901 190.2690 155.4522 246.3786 182.8511
random_group <- function(n, probs) {
 probs <- probs / sum(probs)
 g <- findInterval(seq(0, 1, length = n), c(0, cumsum(probs)),
 rightmost.closed = TRUE)
 names(probs)[sample(g)]
}

random_group(8, c(training = 0.5, test = 0.5))
## [1] "training" "training" "test"     "training" "test"     "training" "test"    
## [8] "test"
random_group(8, c(training = 0.6, test = 0.4))
## [1] "test"     "training" "training" "training" "test"     "test"     "training"
## [8] "training"
random_group(8, c(training = 0.8, test = 0.2)) 
## [1] "training" "training" "training" "test"     "training" "training" "test"    
## [8] "training"
partition <- function(df, n, probs) {
 replicate(n, split(df, random_group(nrow(df), probs)), FALSE)
}

random_cars <- cars %>% partition(4, c(training = 0.5, test = 0.5))

random_cars %>% prediction_accuracy_cars 
## [1] 63.03685 84.98622 73.82351 87.96673
### Decision Tree
library(rpart)

model <- cars %>% rpart(dist ~ speed, data = .)
rmse(predict(model, cars), cars$dist) # root mean square
## [1] 117.1626
model <- BreastCancer %>%
 rpart(Class ~ Cl.thickness, data = .) 
predict(model, BreastCancer) %>% head 
##       benign malignant
## 1 0.82815356 0.1718464
## 2 0.82815356 0.1718464
## 3 0.82815356 0.1718464
## 4 0.82815356 0.1718464
## 5 0.82815356 0.1718464
## 6 0.03289474 0.9671053
predicted_class <- predict(model, BreastCancer) %>%
 as.data.frame %$% ifelse(benign > 0.5, "benign", "malignant")
 
table(BreastCancer$Class, predicted_class)
##            predicted_class
##             benign malignant
##   benign       453         5
##   malignant     94       147
accuracy = sum(453,147) / sum(453,147,5,94)
accuracy
## [1] 0.8583691
#install.packages("party")
library(party)
model <- cars %>% ctree(dist ~ speed, data = .)
rmse(predict(model, cars), cars$dist) 
## [1] 117.1626
model <- BreastCancer %>%
 ctree(Class ~ Cl.thickness, data = .) 

predict(model, BreastCancer) %>% head 
## [1] benign    benign    benign    benign    benign    malignant
## Levels: benign malignant
table(BreastCancer$Class, predict(model, BreastCancer)) 
##            
##             benign malignant
##   benign       453         5
##   malignant     94       147
cars %>% ctree(dist ~ speed, data = .) %>% plot

### Random Forests
library(randomForest)
model <- cars %>% randomForest(dist ~ speed, data = .)
rmse(predict(model, cars), cars$dist) # root mean square error
## [1] 83.52702
model <- BreastCancer %>% randomForest(Class ~ Cl.thickness, data = .)
 
predict(model, BreastCancer) %>% head 
##         1         2         3         4         5         6 
##    benign    benign    benign malignant    benign malignant 
## Levels: benign malignant
table(BreastCancer$Class, predict(model, BreastCancer))
##            
##             benign malignant
##   benign       437        21
##   malignant     76       165
accuracy = sum(437,165) / sum(437,165,76,21)
accuracy
## [1] 0.8612303
### Neural Networks
library(nnet)
model <- cars %>% nnet(dist ~ speed, data = ., size = 5)
## # weights:  16
## initial  value 121964.440575 
## final  value 120655.000000 
## converged
rmse(predict(model, cars), cars$dist)
## [1] 347.3543
model <- BreastCancer %>% nnet(Class ~ Cl.thickness, data = ., size = 5)
## # weights:  56
## initial  value 456.304615 
## iter  10 value 228.635702
## iter  20 value 225.141400
## iter  30 value 225.098834
## iter  40 value 225.098489
## iter  50 value 225.098398
## final  value 225.098360 
## converged
predict(model, BreastCancer) %>% head 
##        [,1]
## 1 0.3461856
## 2 0.3461856
## 3 0.1111388
## 4 0.5294387
## 5 0.1499583
## 6 0.9130318
predicted_class <- predict(model, BreastCancer) %>%
 { ifelse(. < 0.5, "benign", "malignant") }
 
table(BreastCancer$Class, predicted_class)
##            predicted_class
##             benign malignant
##   benign       437        21
##   malignant     76       165
accuracy = sum(437,165) / sum(437,165,76,21)
accuracy
## [1] 0.8612303
### Support Vector Machines
#install.packages("kernlab")
library(kernlab)

model <- cars %>% ksvm(dist ~ speed, data = .)
rmse(predict(model, cars), cars$dist)
## [1] 100.4652
model <- BreastCancer %>% ksvm(Class ~ Cl.thickness, data = .)

predict(model, BreastCancer) %>% head 
## [1] benign    benign    benign    malignant benign    malignant
## Levels: benign malignant
table(BreastCancer$Class, predict(model, BreastCancer))
##            
##             benign malignant
##   benign       437        21
##   malignant     76       165
### Naive Bayes
library(e1071)

model <- BreastCancer %>% naiveBayes(Class ~ Cl.thickness, data = .)

predict(model, BreastCancer) %>% head
## [1] benign    benign    benign    malignant benign    malignant
## Levels: benign malignant
table(BreastCancer$Class, predict(model, BreastCancer))
##            
##             benign malignant
##   benign       437        21
##   malignant     76       165
## **Chapter Seven** 
### **Unsupervised Learning**
### Principal Component Analysis
iris %>% head()
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
#iris %>% ggplot() + geom_point(aes(x = Sepal.Length, y = Sepal.Width, colour = Species))

#iris %>% ggplot() + geom_point(aes(x = Petal.Length, y = Petal.Width, colour = Species))

pca <- iris %>% select(-Species) %>% prcomp
pca
## Standard deviations (1, .., p=4):
## [1] 2.0562689 0.4926162 0.2796596 0.1543862
## 
## Rotation (n x k) = (4 x 4):
##                      PC1         PC2         PC3        PC4
## Sepal.Length  0.36138659 -0.65658877  0.58202985  0.3154872
## Sepal.Width  -0.08452251 -0.73016143 -0.59791083 -0.3197231
## Petal.Length  0.85667061  0.17337266 -0.07623608 -0.4798390
## Petal.Width   0.35828920  0.07548102 -0.54583143  0.7536574
pca %>% plot

#?prcomp
mapped_iris <- pca %>% predict(iris)
mapped_iris %>% head
##            PC1        PC2         PC3          PC4
## [1,] -2.684126 -0.3193972  0.02791483  0.002262437
## [2,] -2.714142  0.1770012  0.21046427  0.099026550
## [3,] -2.888991  0.1449494 -0.01790026  0.019968390
## [4,] -2.745343  0.3182990 -0.03155937 -0.075575817
## [5,] -2.728717 -0.3267545 -0.09007924 -0.061258593
## [6,] -2.280860 -0.7413304 -0.16867766 -0.024200858
mapped_iris %>%
 as.data.frame %>%
 cbind(Species = iris$Species) %>%
 ggplot() + geom_point(aes(x = PC1, y = PC2, colour = Species))

data(HouseVotes84)
HouseVotes84 %>% head(na.rm = TRUE)
##        Class   V1 V2 V3   V4   V5 V6 V7 V8 V9 V10  V11  V12 V13 V14 V15  V16
## 1 republican    n  y  n    y    y  y  n  n  n   y <NA>    y   y   y   n    y
## 2 republican    n  y  n    y    y  y  n  n  n   n    n    y   y   y   n <NA>
## 3   democrat <NA>  y  y <NA>    y  y  n  n  n   n    y    n   y   y   n    n
## 4   democrat    n  y  y    n <NA>  y  n  n  n   n    y    n   y   n   n    y
## 5   democrat    y  y  y    n    y  y  n  n  n   n    y <NA>   y   y   y    y
## 6   democrat    n  y  y    n    y  y  n  n  n   n    n    n   y   y   y    y
vote_patterns <- HouseVotes84 %>% select(-Class) %>% apply(c(1,2), . %>% { ifelse(as.character(.) == "n", 0, 1) }) %>% apply(c(1,2), . %>% { ifelse(is.na(.), 0.5, .) })

pca <- vote_patterns %>% prcomp
#pca %>% head
pca %>% plot

mapped_votes <- pca %>% predict(vote_patterns)

mapped_votes %>%as.data.frame %>% cbind(Class = HouseVotes84$Class) %>% ggplot() + geom_point(aes(x = PC1, y = PC2, colour = Class))

### Multidimensional Scaling 
iris_dist <- iris %>% select(-Species) %>% dist

mds_iris <- iris_dist %>% cmdscale(k = 2)

mds_iris %>% head
##           [,1]       [,2]
## [1,] -2.684126  0.3193972
## [2,] -2.714142 -0.1770012
## [3,] -2.888991 -0.1449494
## [4,] -2.745343 -0.3182990
## [5,] -2.728717  0.3267545
## [6,] -2.280860  0.7413304
mds_iris %>% as.data.frame %>%
 cbind(Species = iris$Species) %>% ggplot() +
 geom_point(aes(x = V1, y = V2, colour = Species))

mds_votes <- vote_patterns %>% dist %>% cmdscale(k = 2 )

mds_votes %>% as.data.frame %>% cbind(Class = HouseVotes84$Class) %>% ggplot() + geom_point(aes(x = V1, y = V2, colour = Class))

random_ngram <- function(n)
 sample(c('C','H','A','R','L','E','S'), size = n, replace = TRUE) %>% paste0(collapse = "")

random_string <- function(m) {
 n <- max(1, m + sample(c(-1,1), size = 1) * rgeom(1, 1/2))
 random_ngram(n)
}

strings <- replicate(10, random_string(5))
#install.packages("stringdist")
library(stringdist)
string_dist <- stringdistmatrix(strings)
string_dist %>% cmdscale(k = 4) %>% as.data.frame %>%
 cbind(String = strings) %>% ggplot(aes(x = V1, y = V2)) +
 geom_point() + geom_label(aes(label = String), hjust = 0, nudge_y = -0.1)

### Kmeans
clusters <- iris %>% select(-Species) %>% kmeans(centers = 3)

clusters$centers
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053
#clusters$cluster %>% head

clusters$cluster %>% table
## .
##  1  2  3 
## 50 62 38
iris %>% cbind(Cluster = clusters$cluster) %>%
 ggplot() + geom_bar(aes(x = Species, fill = as.factor(Cluster)), position = "dodge") + scale_fill_discrete("Cluster")

pca <- iris %>% select(-Species) %>% prcomp
 
mapped_iris <- pca %>% predict(iris)
 
mapped_centers <- pca %>% predict(clusters$centers)

mapped_iris %>%
 as.data.frame %>%
 cbind(Species = iris$Species,
 Clusters = as.factor(clusters$cluster)) %>%
 ggplot() +
 geom_point(aes(x = PC1, y = PC2,
 colour = Species, shape = Clusters)) +
 geom_point(aes(x = PC1, y = PC2),
 size = 5, shape = "X",
 data = as.data.frame(mapped_centers))

table(iris$Species, clusters$cluster)
##             
##               1  2  3
##   setosa     50  0  0
##   versicolor  0 48  2
##   virginica   0 14 36
tbl <- table(iris$Species, clusters$cluster)
(counts <- apply(tbl, 1, which.max))
##     setosa versicolor  virginica 
##          1          2          3
tbl <- table(iris$Species, clusters$cluster)
(counts <- apply(tbl, 1, which.min))
##     setosa versicolor  virginica 
##          2          1          1
map <- rep(NA, each = 3)
map[counts] <- names(counts)
table(iris$Species, map[clusters$cluster])
##             
##              setosa virginica
##   setosa          0        50
##   versicolor     48         0
##   virginica      14         0
#?prcomp
## Hierarchical Clustering
iris_dist <- iris %>% select(-Species) %>% scale %>% dist

clustering <- hclust(iris_dist)

plot(clustering)

library(ggdendro)
ggdendrogram(clustering) + theme_dendro()

clusters <- clustering %>% cutree(k = 2)

iris %>% cbind(Cluster = clusters) %>%
 ggplot() + geom_bar(aes(x = Species, fill = as.factor(Cluster)), position = "dodge") +
 scale_fill_discrete("Cluster")

mapped_iris %>% as.data.frame %>%
 cbind(Species = iris$Species,
 Clusters = as.factor(clusters)) %>%
 ggplot() + geom_point(aes(x = PC1, y = PC2,
 shape = Species, colour = Clusters))

## Association Rules
library(arules)

data(income)
income %>% head
##            INCOME SEX MARITAL.STATUS   AGE               EDUCATION
## 1        [75.000-   F        Married 45-54 1 to 3 years of college
## 2        [75.000-   M        Married 45-54        College graduate
## 3        [75.000-   F        Married 25-34        College graduate
## 4        -10.000)   F         Single 14-17          Grades 9 to 11
## 5        -10.000)   F         Single 14-17          Grades 9 to 11
## 6 [50.000-75.000)   M        Married 55-64 1 to 3 years of college
##                OCCUPATION      AREA DUAL.INCOMES HOUSEHOLD.SIZE UNDER18
## 1               Homemaker 10+ years           No          Three    None
## 2               Homemaker 10+ years           No           Five     Two
## 3 Professional/Managerial 10+ years          Yes          Three     One
## 4  Student, HS or College 10+ years  Not Married           Four     Two
## 5  Student, HS or College 4-6 years  Not Married           Four     Two
## 6                 Retired 10+ years           No            Two    None
##   HOUSEHOLDER HOME.TYPE ETHNIC.CLASS LANGUAGE
## 1         Own     House        White     <NA>
## 2         Own     House        White  English
## 3        Rent Apartment        White  English
## 4      Family     House        White  English
## 5      Family     House        White  English
## 6         Own     House        White  English
data(Income)
Income %>% head
## transactions in sparse format with
##  6 transactions (rows) and
##  50 items (columns)
rules <- income %>% apriori
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 899 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[84 item(s), 8993 transaction(s)] done [0.01s].
## sorting and recoding items ... [42 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.04s].
## writing ... [785 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
rules %>% head %>% inspect(linebreak = FALSE)
##     lhs                                     rhs                support  
## [1] {}                                   => {LANGUAGE=English} 0.8666741
## [2] {EDUCATION=Grad Study}               => {LANGUAGE=English} 0.1000778
## [3] {OCCUPATION=Clerical/Service Worker} => {LANGUAGE=English} 0.1046369
## [4] {INCOME=[30.000-40.000)}             => {LANGUAGE=English} 0.1111976
## [5] {UNDER18=Two}                        => {LANGUAGE=English} 0.1073057
## [6] {INCOME=[50.000-75.000)}             => {LANGUAGE=English} 0.1329923
##     confidence coverage  lift      count
## [1] 0.8666741  1.0000000 1.0000000 7794 
## [2] 0.9316770  0.1074169 1.0750027  900 
## [3] 0.8860640  0.1180918 1.0223728  941 
## [4] 0.9009009  0.1234293 1.0394921 1000 
## [5] 0.8405923  0.1276548 0.9699059  965 
## [6] 0.9143731  0.1454465 1.0550368 1196
rules %>% sort(by = "lift") %>% head %>% inspect(linebreak = FALSE)
##     lhs                                                                           
## [1] {MARITAL.STATUS=Married, OCCUPATION=Professional/Managerial, LANGUAGE=English}
## [2] {MARITAL.STATUS=Married, OCCUPATION=Professional/Managerial}                  
## [3] {DUAL.INCOMES=No, HOUSEHOLDER=Own}                                            
## [4] {AREA=10+ years, DUAL.INCOMES=Yes, HOME.TYPE=House}                           
## [5] {DUAL.INCOMES=Yes, HOUSEHOLDER=Own, HOME.TYPE=House, LANGUAGE=English}        
## [6] {DUAL.INCOMES=Yes, HOUSEHOLDER=Own, HOME.TYPE=House}                          
##        rhs                      support   confidence coverage  lift     count
## [1] => {DUAL.INCOMES=Yes}       0.1091960 0.8069022  0.1353275 3.281986  982 
## [2] => {DUAL.INCOMES=Yes}       0.1176471 0.8033409  0.1464472 3.267501 1058 
## [3] => {MARITAL.STATUS=Married} 0.1016346 0.9713071  0.1046369 2.619965  914 
## [4] => {MARITAL.STATUS=Married} 0.1003002 0.9605964  0.1044145 2.591075  902 
## [5] => {MARITAL.STATUS=Married} 0.1098632 0.9601555  0.1144223 2.589886  988 
## [6] => {MARITAL.STATUS=Married} 0.1209830 0.9594356  0.1260981 2.587944 1088
rules %>% subset(support > 0.5) %>% sort(by = "lift") %>%
 head %>% inspect(linebreak = FALSE)
##     lhs                           rhs                support   confidence
## [1] {ETHNIC.CLASS=White}       => {LANGUAGE=English} 0.6110308 0.9456204 
## [2] {AREA=10+ years}           => {LANGUAGE=English} 0.5098410 0.8847935 
## [3] {UNDER18=None}             => {LANGUAGE=English} 0.5609919 0.8813767 
## [4] {}                         => {LANGUAGE=English} 0.8666741 0.8666741 
## [5] {DUAL.INCOMES=Not Married} => {LANGUAGE=English} 0.5207384 0.8611622 
##     coverage  lift      count
## [1] 0.6461692 1.0910911 5495 
## [2] 0.5762260 1.0209069 4585 
## [3] 0.6364951 1.0169644 5045 
## [4] 1.0000000 1.0000000 7794 
## [5] 0.6046925 0.9936402 4683
# **Chapter 8**
## **More on R Programming**

3 %% 4
## [1] 3
3 %/% 4
## [1] 0
3/4
## [1] 0.75
1 : 2 ^ 4
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16
-1 : 2
## [1] -1  0  1  2
!TRUE
## [1] FALSE
!FALSE
## [1] TRUE
x <- c(TRUE, FALSE, TRUE, FALSE)

y <- c(TRUE, TRUE, FALSE, FALSE)

x | y
## [1]  TRUE  TRUE  TRUE FALSE
FALSE | TRUE
## [1] TRUE
FALSE || TRUE
## [1] TRUE
is.numeric(2)
## [1] TRUE
x <- as.integer(2)

is.integer(x)
## [1] TRUE
as.integer(3.2)
## [1] 3
as.character(9.99999)
## [1] "9.99999"
sqrt(as.complex(1))
## [1] 1+0i
x <- 5 > 4
x
## [1] TRUE
class(x)
## [1] "logical"
is.logical(x)
## [1] TRUE
x <- "hello, world"
class(x)
## [1] "character"
is.character(x)
## [1] TRUE
as.character(3.14) 
## [1] "3.14"
rep("foo", 10)
##  [1] "foo" "foo" "foo" "foo" "foo" "foo" "foo" "foo" "foo" "foo"
v <- 1:3
is.atomic(v)
## [1] TRUE
v <- 1:3
is.vector(v)
## [1] TRUE
attr(v, "foo") <- "bar"
v
## [1] 1 2 3
## attr(,"foo")
## [1] "bar"
is.vector(v)
## [1] FALSE
c(1, 2, c(3, 4), c(5, 6, 7))
## [1] 1 2 3 4 5 6 7
c(1, 2, 3, "foo") -> vec
vec
## [1] "1"   "2"   "3"   "foo"
is.vector(vec)
## [1] TRUE
is.character(vec)
## [1] TRUE
v <- 1:6

attributes(v)
## NULL
dim(v) <- c(2, 3)

attributes(v)
## $dim
## [1] 2 3
dim(v)
## [1] 2 3
v
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
v <- 1:6
matrix(data = v, nrow = 2, ncol = 3, byrow = FALSE)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
matrix(data = v, nrow = 2, ncol = 3, byrow = TRUE)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
(A <- matrix(1:4, nrow = 2))
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
(B <- matrix(5:8, nrow = 2))
##      [,1] [,2]
## [1,]    5    7
## [2,]    6    8
A * B # element-wise mult
##      [,1] [,2]
## [1,]    5   21
## [2,]   12   32
A %*% B
##      [,1] [,2]
## [1,]   23   31
## [2,]   34   46
t(A)
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
solve(A) ### inverse
##      [,1] [,2]
## [1,]   -2  1.5
## [2,]    1 -0.5
solve(A) %*% A ### identity matrix
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
list(1:3, 5:8)
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] 5 6 7 8
list(1:3, c(TRUE, FALSE))
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1]  TRUE FALSE
list((1:4), (5:8), (9:12))
## [[1]]
## [1] 1 2 3 4
## 
## [[2]]
## [1] 5 6 7 8
## 
## [[3]]
## [1]  9 10 11 12
unlist(list(1:4, 5:7))
## [1] 1 2 3 4 5 6 7
#?'%%'
#?`[[`
v <- 1:4
v[2]
## [1] 2
v[2:3]
## [1] 2 3
v[c(1,1,4,3,2)]
## [1] 1 1 4 3 2
v[-1]
## [1] 2 3 4
v[-(1:2)]
## [1] 3 4
v[v %% 2 == 0]
## [1] 2 4
v[v %% 2 == 0] <- 13
v
## [1]  1 13  3 13
m <- matrix(1:6, nrow = 2, byrow = TRUE)
m
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
m[1,]
## [1] 1 2 3
m[,1]
## [1] 1 4
m[1:2,1:2]
##      [,1] [,2]
## [1,]    1    2
## [2,]    4    5
m[1,,drop = FALSE]
##      [,1] [,2] [,3]
## [1,]    1    2    3
m[,1,drop = FALSE]
##      [,1]
## [1,]    1
## [2,]    4
L <- list(1,2,3)
L[1]
## [[1]]
## [1] 1
L[2:3]
## [[1]]
## [1] 2
## 
## [[2]]
## [1] 3
L[[1]]
## [1] 1
L[[2]]
## [1] 2
L[[3]]
## [1] 3
### Named Values
v <- c(a = 1, b = 2, c = 3, d = 4)
v
## a b c d 
## 1 2 3 4
L <- list(a = 1:5, b = c(TRUE, FALSE))
L
## $a
## [1] 1 2 3 4 5
## 
## $b
## [1]  TRUE FALSE
names(v) <- LETTERS[1:4]
v
## A B C D 
## 1 2 3 4
v["A"]
## A 
## 1
L["a"]
## $a
## [1] 1 2 3 4 5
L[["a"]]
## [1] 1 2 3 4 5
L$a
## [1] 1 2 3 4 5
### Control Structures
if (10 < 12) {
 print("Yes")
} else {
 print("No")
}
## [1] "Yes"
x = 100
if (x < 0) {
 print("Negative")
} else if (x > 0) {
 print("Positive")
} else {
 print("Nothing")
}
## [1] "Positive"
if (x > 0) "positive" else if (x < 0) "negative" else "zero"
## [1] "positive"
if (x > 0) {
 print("positive")
}else if (x < 0){
 print("negative")
}else{
 print("zero")
}
## [1] "positive"
for (i in 1:10) {
 print(i * i)
}
## [1] 1
## [1] 4
## [1] 9
## [1] 16
## [1] 25
## [1] 36
## [1] 49
## [1] 64
## [1] 81
## [1] 100
x <- c("foo", "bar", "baz")
for (i in seq_along(x)) {
 print(i)
 print(x[i])
}
## [1] 1
## [1] "foo"
## [1] 2
## [1] "bar"
## [1] 3
## [1] "baz"
for (i in seq_along(x)) {
 if (i %% 2 == 0) {
 next
 }
 print(x[i])
}
## [1] "foo"
## [1] "baz"
for (i in 1:100) {
 if (i %% 2 == 0) {
 next
 }
 if (i > 20) {
 break
 }
 print(i)
}
## [1] 1
## [1] 3
## [1] 5
## [1] 7
## [1] 9
## [1] 11
## [1] 13
## [1] 15
## [1] 17
## [1] 19
i <- 1
while (i < 5) {
 i <- i + 1
 print(i)
}
## [1] 2
## [1] 3
## [1] 4
## [1] 5
i <- 1
repeat {
 print(i)
 i <- i + 1
 if (i > 5) break
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
### Function
f <- function() {
 print("hello, world")
 (5 + 4) + (4 ** 2)
}
f()
## [1] "hello, world"
## [1] 25
plus <- function(x, y) {
 print(paste(x, "+", y, "is", x + y))
 x + y
}
plus(2, 2)
## [1] "2 + 2 is 4"
## [1] 4
div <- function(x, y) {
 print(paste(x, "/", y, "is", x / y))
 x / y}

div (6,2)
## [1] "6 / 2 is 3"
## [1] 3
div(3,4)
## [1] "3 / 4 is 0.75"
## [1] 0.75
div(y = 2, x = 6)
## [1] "6 / 2 is 3"
## [1] 3
pow <- function(x, y = 2) x^y
pow(2)
## [1] 4
pow(3)
## [1] 9
pow(4)
## [1] 16
pow(8)
## [1] 64
pow(9,2)
## [1] 81
pow(8,0)
## [1] 1
safer_div <- function(x, y) {
 if (y == 0) {
 NA
 } else {
 x / y
 }
}
safer_div(2,4)
## [1] 0.5
safer_div(3,9)
## [1] 0.3333333
safer_div <- function(x, y) {
 if (y == 0) {
 return(NA)
 }
 x / y
}
safer_div(3,6)
## [1] 0.5
safer_div(1,0)
## [1] NA
f <- function(x, y = x^2) y + x
f(7)
## [1] 56
g <- function(x, y = x^2) { x <- 0; y + x }
g(2)
## [1] 0
g(2,9)
## [1] 9
h <- function(x, y = x^2) { y; x <- 0; y + x }
h(2)
## [1] 4
h(8)
## [1] 64
### Scoping
x <- "x"
f <- function(y) {
 g <- function() c(x, y)
 g()
}
f("y")
## [1] "x" "y"
x <- "x"
f <- function(y) {
 g <- function() c(x, y)
 y <- "z"
 g()
}
f("y")
## [1] "x" "z"
x <- "x"
f <- function(y) {
 g <- function() c(x, y)
 g
}
g <- f("y")
g()
## [1] "x" "y"
x <- "x"
f <- function(y) {
 g <- function() c(x, y)
 g
}
g <- f("u")
g()
## [1] "x" "u"
h <- f("z")
h()
## [1] "x" "z"
f <- function() {
 x <- NULL
 set <- function(val) { x <- val }
 get <- function() x
 list(set = set, get = get)
}
x <- f()
x$get()
## NULL
x$set(5)
x$get()
## NULL
f <- function(){
x <- NULL
 set <- function(val) { x <<- val }
 get <- function() x
 list(set = set, get = get)
}
x <- f()
x$get()
## NULL
x$set(5)
x$get()
## [1] 5
n <- function(x) x
f <- function(n) n(n)
f(5)
## [1] 5
f(function(x) 15)
## [1] 15
factorial <- function(n) {
 if (n == 1) {
 1
 } else {
 n * factorial(n - 1)
 }
}
factorial(9)
## [1] 362880
factorial(1)
## [1] 1
merge <- function(x, y) {
 if (length(x) == 0) return(y)
 if (length(y) == 0) return(x)
 if (x[1] < y[1]) {
 c(x[1], merge(x[-1], y))
 } else {
 c(y[1], merge(x, y[-1]))
 }
}
merge(9,0)
## [1] 0 9
merge(12,5)
## [1]  5 12
merge_sort <- function(x) {
 if (length(x) < 2) return(x)
 n <- length(x)
 m <- n %/% 2
 merge(merge_sort(x[1:m]), merge_sort(x[(m+1):n]))}
merge_sort (10)
## [1] 10
# **Chapter 9**
### **Advance R Programming**
(x <- 2 / 3)
## [1] 0.6666667
(y <- x ** 2)
## [1] 0.4444444
(x <- 1:4 / 3)
## [1] 0.3333333 0.6666667 1.0000000 1.3333333
(y <- x ** 2)
## [1] 0.1111111 0.4444444 1.0000000 1.7777778
x <- 1:5
y <- 6:10
(z <- x + y)
## [1]  7  9 11 13 15
(s = y -  x)
## [1] 5 5 5 5 5
z <- vector(length = length(x))
for (i in seq_along(x)) {
 z[i] <- x[i] + y[i]
}
z
## [1]  7  9 11 13 15
s = vector(length = length(x))
for (i in seq_along(x)){
  s[i] = y[i] - x[i]
}
s
## [1] 5 5 5 5 5
sqrt((1:5)**2)
## [1] 1 2 3 4 5
sin(sqrt((1:5)**2))
## [1]  0.8414710  0.9092974  0.1411200 -0.7568025 -0.9589243
x <- 1:10
y <- 1:2
x + y
##  [1]  2  4  4  6  6  8  8 10 10 12
x <- 1:10
ifelse(x %% 2 == 0, 5, 15)
##  [1] 15  5 15  5 15  5 15  5 15  5
f <- function(x, y) sqrt(x ** y)
f(1:6, 2)
## [1] 1 2 3 4 5 6
f(1:6, 1:2)
## [1] 1.000000 2.000000 1.732051 4.000000 2.236068 6.000000
role_table <- list("Thomas" = "Instructor",
 "Henrik" = "Student",
 "Kristian" = "Student",
 "Randi" = "Student",
 "Heidi" = "Student",
 "Manfred" = "Student",
 "Charles" = "Researcher")

map_to_role <- function(name) role_table[[name]]

map_to_role("Thomas")
## [1] "Instructor"
map_to_role("Charles")
## [1] "Researcher"
role_table[c("Thomas", "Henrik", "Randi","Charles")]
## $Thomas
## [1] "Instructor"
## 
## $Henrik
## [1] "Student"
## 
## $Randi
## [1] "Student"
## 
## $Charles
## [1] "Researcher"
map_to_role_2 <- function(names) unlist(role_table[names])
x <- c("Thomas", "Henrik", "Randi","Charles")
map_to_role_2(x)
##       Thomas       Henrik        Randi      Charles 
## "Instructor"    "Student"    "Student" "Researcher"
x <- list("first" = list("second" = "foo"), "third" = "bar")
x[[c("first", "second")]]
## [1] "foo"
#x[["first"]][["second"]]
### apply
m <- matrix(1:6, nrow = 2, byrow = TRUE)
m
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
apply(m, 1, function(x) paste(x, collapse = ":")) # row-wise
## [1] "1:2:3" "4:5:6"
apply(m, 2, function(x) paste(x, collapse = ":")) # column-wise
## [1] "1:4" "2:5" "3:6"
apply(m, c(1,2), function(x) paste(x, collapse = ":")) # Both
##      [,1] [,2] [,3]
## [1,] "1"  "2"  "3" 
## [2,] "4"  "5"  "6"
apply(m, 1, function(x) c(x,x))
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
## [4,]    1    4
## [5,]    2    5
## [6,]    3    6
apply(m, 2, function(x) c(x,x))
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    1    2    3
## [4,]    4    5    6
apply(m, c(1,2), function(x) c(x,x))
## , , 1
## 
##      [,1] [,2]
## [1,]    1    4
## [2,]    1    4
## 
## , , 2
## 
##      [,1] [,2]
## [1,]    2    5
## [2,]    2    5
## 
## , , 3
## 
##      [,1] [,2]
## [1,]    3    6
## [2,]    3    6
x <- apply(m, c(1,2), function(x) c(x,x))
k <- dim(x)[3]
n <- dim(x)[2]
for (i in 1:n) {
 for (j in 1:k) {
 print(x[,i,j])
 }
}
## [1] 1 1
## [1] 2 2
## [1] 3 3
## [1] 4 4
## [1] 5 5
## [1] 6 6
sumpow <- function(x, n) sum(x) ** n
apply(m, 1, sumpow,n = 2) # row - wise
## [1]  36 225
apply(m, 2, sumpow, n = 2) # column - wise
## [1] 25 49 81
### lapply
l <- list(1, 2, 3)
lapply(l, function(x) x**2)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 9
l <- list(a = 1, b = 2, c = 3)
lapply(l, function(x) x**2)
## $a
## [1] 1
## 
## $b
## [1] 4
## 
## $c
## [1] 9
lapply(1:5, function(x) x**4)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 16
## 
## [[3]]
## [1] 81
## 
## [[4]]
## [1] 256
## 
## [[5]]
## [1] 625
lapply(list(a = 1:3, b = 4:6), function(x) x**2)
## $a
## [1] 1 4 9
## 
## $b
## [1] 16 25 36
### sapply
sapply(1:3, function(x) x**2) # returns vector by default 
## [1] 1 4 9
vapply(1:3, function(x) x**2,1)
## [1] 1 4 9
#?vapply
### Advance Functions

`+`(2, 2)
## [1] 4
v <- 1:4
names(v) <- c("a", "b", "c", "d")
v
## a b c d 
## 1 2 3 4
`foo<-` <- function(x, value) {
 x$foo <- value
 x
}

`bar<-` <- function(x, value) {
 x$bar <- value
 x
}

x <- list(foo = 1, bar = 2)
x$foo 
## [1] 1
x$bar
## [1] 2
foo(x) <- 4
x$foo
## [1] 4
bar(x) <- 3
x$bar
## [1] 3
y <- x
foo(x) <- 5
x
## $foo
## [1] 5
## 
## $bar
## [1] 3
y
## $foo
## [1] 4
## 
## $bar
## [1] 3
`foo<-` <- function(y, value) {
 y$foo <- value
 y
}

x <- list(foo = 1, bar = 2)
foo(x) <- 3
x$foo
## [1] 3
`modify<-` <- function(x, variable, value) {
 x[variable] <- value
 x
}

x <- list(foo = 1, bar = 2)
modify(x, "foo") <- 5
modify(x, "bar") <- 7
x$foo
## [1] 5
x$bar
## [1] 7
x <- 1:4
f <- function(x) {
 x[2] <- 5 # insert 5 at index 2
 x[1] <- 9 # insert 9 at index 1 
 x[4] <- 10 # insert 10 at index 4
 x
}
x
## [1] 1 2 3 4
f(x)
## [1]  9  5  3 10
square <- function(x) (x**2 - x)
square(5)
## [1] 20
m <- matrix(1:6, nrow = 3)
m
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
sum_of_squares <- function(x) sum(x^2)
apply(m, 1, sum_of_squares) # row - wise
## [1] 17 29 45
apply(m,2 , sum_of_squares) # column - wise
## [1] 14 77
#apply(m^2, 1, sum)
apply_if <- function(x, p, f) {
 result <- vector(length = length(x))
 n <- 0
 for (i in seq_along(x)) {
 if (p(x[i])) {
 n <- n + 1
 result[n] <- f(x[i])
 }
 }
 head(result, n)
}
apply_if(1:8, function(x) x %% 2 == 0, function(x) x^2)
## [1]  4 16 36 64
power <- function(n) function(x) x**n
square <- power(2)
cube <- power(3)

x <- 1:4
square(x) 
## [1]  1  4  9 16
cube(x)
## [1]  1  8 27 64
### Filter, Map and Reduce
is_even <- function(x) x %% 2 == 0
Filter(is_even, 1:8)
## [1] 2 4 6 8
Filter(is_even, unlist(1:8))
## [1] 2 4 6 8
Filter(is_even, as.list(1:8))
## [[1]]
## [1] 2
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 6
## 
## [[4]]
## [1] 8
divi <- function(x) (x %% 3 == 0)
Filter(divi, 1:12)
## [1]  3  6  9 12
square <- function(x) x^2
Map(square, 1:4)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 9
## 
## [[4]]
## [1] 16
unlist(Map(square, 1:4))
## [1]  1  4  9 16
plus <- function(x, y) x + y
unlist(Map(plus, 0:3, 3:0))
## [1] 3 3 3 3
add_parenthesis <- function(a, b) paste("(", a, ", ", b, ")", sep = "")
Reduce(add_parenthesis, 1:4)
## [1] "(((1, 2), 3), 4)"
mysum <- function(x) Reduce(`+`, x)
mysum(1:4)
## [1] 10
### Function Operations
cached <- function(f) {
 force(f)
 table <- list()
 
function(n) {
 key <- as.character(n)
 if (key %in% names(table)) {
 print(paste("I have already computed the value for", n))
 table[[key]]
 } else {
 print(paste("Going to compute the value for", n))
 res <- f(n)
 print(paste("That turned out to be", res))
 table[key] <<- res
 print(table)
 res
 }
 }
}

factorial <- function(n) {
 if (n == 1) {
 1
 } else {
 n * factorial(n - 1)
 }
}

factorial <- cached(factorial)

factorial(4)
## [1] "Going to compute the value for 4"
## [1] "Going to compute the value for 3"
## [1] "Going to compute the value for 2"
## [1] "Going to compute the value for 1"
## [1] "That turned out to be 1"
## $`1`
## [1] 1
## 
## [1] "That turned out to be 2"
## $`1`
## [1] 1
## 
## $`2`
## [1] 2
## 
## [1] "That turned out to be 6"
## $`1`
## [1] 1
## 
## $`2`
## [1] 2
## 
## $`3`
## [1] 6
## 
## [1] "That turned out to be 24"
## $`1`
## [1] 1
## 
## $`2`
## [1] 2
## 
## $`3`
## [1] 6
## 
## $`4`
## [1] 24
## [1] 24
factorial(1)
## [1] "I have already computed the value for 1"
## [1] 1
factorial(2)
## [1] "I have already computed the value for 2"
## [1] 2
factorial(3)
## [1] "I have already computed the value for 3"
## [1] 6
factorial(4)
## [1] "I have already computed the value for 4"
## [1] 24
factorial(5)
## [1] "Going to compute the value for 5"
## [1] "I have already computed the value for 4"
## [1] "That turned out to be 120"
## $`1`
## [1] 1
## 
## $`2`
## [1] 2
## 
## $`3`
## [1] 6
## 
## $`4`
## [1] 24
## 
## $`5`
## [1] 120
## [1] 120
fibonacci <- function(n) {
 if (n == 1 || n == 2) {
 1
 } else {
 fibonacci(n - 1) + fibonacci(n - 2)
 }
}

fibonacci <- cached(fibonacci)

fibonacci(4)
## [1] "Going to compute the value for 4"
## [1] "Going to compute the value for 3"
## [1] "Going to compute the value for 2"
## [1] "That turned out to be 1"
## $`2`
## [1] 1
## 
## [1] "Going to compute the value for 1"
## [1] "That turned out to be 1"
## $`2`
## [1] 1
## 
## $`1`
## [1] 1
## 
## [1] "That turned out to be 2"
## $`2`
## [1] 1
## 
## $`1`
## [1] 1
## 
## $`3`
## [1] 2
## 
## [1] "I have already computed the value for 2"
## [1] "That turned out to be 3"
## $`2`
## [1] 1
## 
## $`1`
## [1] 1
## 
## $`3`
## [1] 2
## 
## $`4`
## [1] 3
## [1] 3
fibonacci(1)
## [1] "I have already computed the value for 1"
## [1] 1
fibonacci(2)
## [1] "I have already computed the value for 2"
## [1] 1
fibonacci(3)
## [1] "I have already computed the value for 3"
## [1] 2
fibonacci(4)
## [1] "I have already computed the value for 4"
## [1] 3
fibonacci(5)
## [1] "Going to compute the value for 5"
## [1] "I have already computed the value for 4"
## [1] "I have already computed the value for 3"
## [1] "That turned out to be 5"
## $`2`
## [1] 1
## 
## $`1`
## [1] 1
## 
## $`3`
## [1] 2
## 
## $`4`
## [1] 3
## 
## $`5`
## [1] 5
## [1] 5
?fibonacci
## No documentation for 'fibonacci' in specified packages and libraries:
## you could try '??fibonacci'
### Ellipsis Parameters
g <- function(a, b, ...) NULL
g(a = 1, b = 2, c = 3)
## NULL
tolist <- function(...) list(...)
tolist()
## list()
tolist(a = 1)
## $a
## [1] 1
tolist(a = 1, b = 3)
## $a
## [1] 1
## 
## $b
## [1] 3
tolist(a = 1, b = 3, c = 4,d = 12, e = 23)
## $a
## [1] 1
## 
## $b
## [1] 3
## 
## $c
## [1] 4
## 
## $d
## [1] 12
## 
## $e
## [1] 23
### System Timeout
time_out <- function(f) {
 force(f)
 function(...) {
 system.time(f(...))
 }
}

ti_mean <- time_out(mean)
ti_mean(runif(1e6))
##    user  system elapsed 
##    0.03    0.02    0.03
# **Chapter 10**
## **Object Oriented Programming**
### **Bayesian Linear Models**
blm <- function(model, alpha = 1, beta = 1, ...) {
 # Here goes the mathematics for computing the fit.
 frame <- model.frame(model, ...)
 phi <- model.matrix(frame)
 no_params <- ncol(phi)
 target <- model.response(frame)
 covar <- solve(diag(alpha, no_params) + beta * t(phi) %*% phi)
 mean <- beta * covar %*% t(phi) %*% target
 list(formula = model,
 frame = frame,
 mean = mean,
 covar = covar)
}
# fake some data for our linear model
x <- rnorm(10)
a <- 1 ; b <- 1.3
w0 <- 0.2 ; w1 <- 3
y <- rnorm(10, mean = w0 + w1 * x, sd = sqrt(1/b))
#frame <- data.frame (x_1 = x, y_1 = y )
#frame

# fit a model
model <- blm(y ~ x, alpha = a, beta = b)

model
## $formula
## y ~ x
## 
## $frame
##             y          x
## 1  -3.2696915 -1.1369671
## 2  -4.3496517 -1.5037810
## 3  -3.8920238 -1.8113726
## 4  -0.6998128 -0.4769833
## 5  -5.3577295 -1.3622658
## 6  -4.1737824 -1.8095640
## 7  -4.0190159 -1.1597942
## 8  -7.9212743 -2.2600729
## 9   6.4632952  1.5249919
## 10  4.2227425  1.3204692
## 
## $mean
##                  [,1]
## (Intercept) 0.4022186
## x           3.1501395
## 
## $covar
##             (Intercept)          x
## (Intercept)  0.10168968 0.03756497
## x            0.03756497 0.04663170
class(model)
## [1] "list"
class(model) <- "blm"

class(model)
## [1] "blm"
blm <- function(model, alpha = 1, beta = 1, ...) {
 # stuff happens here...
 object <- list(formula = model,
 frame = frame,
 mean = mean,
 covar = covar)
 class(object) <- "blm"
 object
}

blm <- function(model, alpha = 1, beta = 1, ...) {
 # stuff happens here...
 structure(list(formula = model,
 frame = frame,
 mean = mean,
 covar = covar),
 class = "blm")
}

model
## $formula
## y ~ x
## 
## $frame
##             y          x
## 1  -3.2696915 -1.1369671
## 2  -4.3496517 -1.5037810
## 3  -3.8920238 -1.8113726
## 4  -0.6998128 -0.4769833
## 5  -5.3577295 -1.3622658
## 6  -4.1737824 -1.8095640
## 7  -4.0190159 -1.1597942
## 8  -7.9212743 -2.2600729
## 9   6.4632952  1.5249919
## 10  4.2227425  1.3204692
## 
## $mean
##                  [,1]
## (Intercept) 0.4022186
## x           3.1501395
## 
## $covar
##             (Intercept)          x
## (Intercept)  0.10168968 0.03756497
## x            0.03756497 0.04663170
## 
## attr(,"class")
## [1] "blm"
### Polymorphic Function
print.blm <- function(x, ...) {
 print(x$formula)
}
model
## y ~ x
#??print
### Defining your own Function
Charles <- function(x, ...) UseMethod("Charles")
Charles.default <- function(x, ...) print("default Charles")
Charles("a string")
## [1] "default Charles"
#Charles(12)

Charles.blm <- function(x, ...) print("blm Charles")
Charles(model)
## [1] "blm Charles"
Charles.blm <- function(x, upper = FALSE, ...) {
 if (upper) {
 print("BLM FOO")
 } else {
 print("blm foo")
 }
}

Charles("a string")
## [1] "default Charles"
Charles(model)
## [1] "blm foo"
Charles("a string", upper = TRUE)
## [1] "default Charles"
Charles(model, upper = TRUE)
## [1] "BLM FOO"
foo <- function(object, ...) UseMethod("foo")
foo.default <- function(object, ...) stop("foo not implemented")

bar <- function(object, ...) UseMethod("bar")
bar.default <- function(object, ...) stop("bar not implemented")


A <- function(f, b) structure(list(foo = f, bar = b), class ="A")

foo.A <- function(object, ...) paste("A::foo ->", object$foo)
bar.A <- function(object, ...) paste("A::bar ->", object$bar)

a <- A("Charles", "Nana")
foo(a)
## [1] "A::foo -> Charles"
bar(a)
## [1] "A::bar -> Nana"
baz <- function(object, ...) UseMethod("baz")
baz.default <- function(object, ...) stop("baz not implemented")
B <- function(f, b, bb) {
 a <- A(f, b)
 a$baz <- bb
 class(a) <- "B"
 a
}

bar.B <- function(object, ...) paste("B::bar ->", object$bar)
baz.B <- function(object, ...) paste("B::baz ->", object$baz)

B <- function(f, b, bb) {
 a <- A(f, b)
 a$baz <- bb
 class(a) <- c("B", "A")
 a
}

b <- B("Charles", "Kwame", "Appiah")

foo(b)
## [1] "A::foo -> Charles"
bar(b)
## [1] "B::bar -> Kwame"
baz(b)
## [1] "B::baz -> Appiah"
C <- function(f, b, bb) {
 b <- B(f, b, bb)
 class(b) <- c("C", "B", "A")
 b
}
c <- C("foo", "bar", "baz")
foo(c)
## [1] "A::foo -> foo"
bar(c)
## [1] "B::bar -> bar"
baz(c)
## [1] "B::baz -> baz"
C <- function(f, b, bb) {
 b <- B(f, b, bb)
 class(b) <- c("C", class(b))
 b
}
# **Chapter 12**
## Testing R packages
area <- function(x) UseMethod("area")

circumference <- function(x) UseMethod("circumference")

rectangle <- function(width, height) {
 structure(list(width = width, height = height),
 class = c("rectangle", "shape"))
}

area.rectangle <- function(x) x$height * x$width

circumference.rectangle <- function(x) 2 * x$height + 2 * x$width

r <- rectangle(width = 3, height = 4)
area(r)
## [1] 12
circumference(r)
## [1] 14
# Automating testing
r <- rectangle(width = 3, height = 4)
if (area(r) != 3*4) {
 stop("Area not computed correctly!")
}

if (circumference(r) != 2*3 + 2*4) {
 stop("Circumference not computed correctly!")
}
## **Chapter 14**
### Profiling and  Optimizing
library(profvis)
graph <- function(n, edges) {
 m <- matrix(0, nrow = n, ncol = n)
 no_edges <- length(edges)
 if (no_edges >= 1) {
 for (i in seq(1, no_edges, by = 2)) {
 m[edges[i], edges[i+1]] <- m[edges[i+1], edges[i]] <- 1
 }
 }
 structure(m, class = "graph")
}
smooth_weights <- function(graph, node_weights, alpha) {
 if (length(node_weights) != nrow(graph))
 stop("Incorrect number of nodes")
 no_nodes <- length(node_weights)
 new_weights <- vector("numeric", no_nodes)
 for (i in 1:no_nodes) {
 neighbour_weights <- 0
 n <- 0
 for (j in 1:no_nodes) {
 if (i != j && graph[i, j] == 1) {
 neighbour_weights <- neighbour_weights + node_weights[j]
 n <- n + 1
 }
 }
 if (n > 0) {
 new_weights[i] <-
 alpha * node_weights[i] +
 (1 - alpha) * neighbour_weights / n
 } else {
 new_weights[i] <- node_weights[i]
 }
 }
 new_weights
}
profvis::profvis({
 n <- 1000
 nodes <- 1:n
 edges <- sample(nodes, 600, replace = TRUE)
 weights <- rnorm(n)
 g <- graph(n, edges)
 smooth_weights(g, weights, 0.8)
})
graph <- function(n, edges) {
 neighbours <- vector("list", length = n)
 for (i in seq_along(neighbours)) {
 neighbours[[i]] <- vector("integer", length = 0)
 }
 no_edges <- length(edges)
 if (no_edges >= 1) {
 for (i in seq(1, no_edges, by = 2)) {
 n1 <- edges[i]
 n2 <- edges[i+1]
 neighbours[[n1]] <- c(n2, neighbours[[n1]])
 neighbours[[n2]] <- c(n1, neighbours[[n2]])
 }
 }
 for (i in seq_along(neighbours)) {
 neighbours[[i]] <- unique(neighbours[[i]])
 }
 structure(neighbours, class = "graph")
}
smooth_weights <- function(graph, node_weights, alpha) {
 if (length(node_weights) != length(graph))
 stop("Incorrect number of nodes")
 no_nodes <- length(node_weights)
 new_weights <- vector("numeric", no_nodes)
 for (i in 1:no_nodes) {
 neighbour_weights <- 0
 n <- 0
 for (j in graph[[i]]) {
 if (i != j) {
   neighbour_weights <- neighbour_weights + node_weights[j]
 n <- n + 1
 }
 }
 if (n > 0) {
 new_weights[i] <-
 alpha * node_weights[i] +
 (1 - alpha) * neighbour_weights / n
 } else {
 new_weights[i] <- node_weights[i]
 }
 }
 new_weights
}
profvis::profvis({
 n <- 10000
 nodes <- 1:n
 edges <- sample(nodes, 600, replace = TRUE)
 weights <- rnorm(n)
 g <- graph(n, edges)
 smooth_weights(g, weights, 0.8)
})
graph <- function(n, edges) {
 neighbours <- vector("list", length = n)
 for (i in seq_along(neighbours)) {
 neighbours[[i]] <- vector("integer", length = 0)
 }
 no_edges <- length(edges)
 if (no_edges >= 1) {
 sources <- seq(1, no_edges, by = 2)
 destinations <- seq(2, no_edges, by = 2)
 edge_matrix <- matrix(NA, nrow = length(sources), ncol = 2)
 edge_matrix[,1] <- edges[sources]
 edge_matrix[,2] <- edges[destinations]
 for (i in 1:nrow(edge_matrix)) {
 if (edge_matrix[i,1] > edge_matrix[i,2]) {
 edge_matrix[i,] <- c(edge_matrix[i,2], edge_matrix[i,1])
 }
 }
 edge_matrix <- unique(edge_matrix)
 for (i in seq(1, nrow(edge_matrix))) {
 n1 <- edge_matrix[i, 1]
 n2 <- edge_matrix[i, 2]
 neighbours[[n1]] <- c(n2, neighbours[[n1]])
 neighbours[[n2]] <- c(n1, neighbours[[n2]])
 }
 }
 structure(neighbours, class = "graph")
}
profvis::profvis({
 n <- 20000
 nodes <- 1:n
 edges <- sample(nodes, 50000, replace = TRUE)
 weights <- rnorm(n)
 g <- graph(n, edges)
 smooth_weights(g, weights, 0.8)
})
flow_weights_iteration <- function(graph, node_weights, alpha) {
 if (length(node_weights) != length(graph))
 stop("Incorrect number of nodes")
   no_nodes <- length(node_weights)
 new_weights <- vector("numeric", n)
 for (i in 1:no_nodes) {
 neighbour_weights <- 0
 n <- 0
 for (j in graph[[i]]) {
 if (i != j) {
 neighbour_weights <- neighbour_weights + node_weights[j]
 n <- n + 1
 }
 }
 if (n > 0) {
 new_weights[i] <- (alpha * node_weights[i] + (1 - alpha)
 * neighbour_weights / n)
 } else {
 new_weights[i] <- node_weights[i]
 }
 }
 new_weights
}
smooth_weights <- function(graph, node_weights, alpha, no_iterations) {
 new_weights <- node_weights
 replicate(no_iterations, {
 new_weights <- flow_weights_iteration(graph, new_weights, alpha)
 })
 new_weights
}
profvis::profvis({
 n <- 20000
 nodes <- 1:n
 edges <- sample(nodes, 100000, replace = TRUE)
 weights <- rnorm(n)
 g <- graph(n, edges)
 smooth_weights(g, weights, 0.8, 10)
})
smooth_weights <- function(graph, node_weights,
 alpha, no_iterations) {
 new_weights <- node_weights
 for (i in 1:no_iterations) {
 new_weights <-
 smooth_weights_iteration(graph, new_weights, alpha)
 }
 new_weights
}
### Increasing Speed
library(microbenchmark)

mysum <- function(sequence) {
 s <- 0
 for (x in sequence) s <- s + x
 s
}
microbenchmark(
 sum(1:10),
 mysum(1:10)
)
## Unit: nanoseconds
##         expr  min   lq  mean median   uq     max neval cld
##    sum(1:10)  300  400   417    400  400    2200   100   a
##  mysum(1:10) 1100 1100 38814   1200 1200 3763600   100   a
microbenchmark(
 sum(1:10),
 mysum(1:10),
 Reduce(`+`, 1:10)
)
## Unit: nanoseconds
##               expr  min    lq  mean median    uq   max neval cld
##          sum(1:10)  300   500   756    700   900  3400   100  a 
##        mysum(1:10) 1200  1500  2152   2200  2500  6100   100  a 
##  Reduce(`+`, 1:10) 8700 12400 14948  14200 15250 68000   100   b
x <- sample(LETTERS, 1000, replace = TRUE)
microbenchmark(
 factor(x, levels = LETTERS),
 factor(x)
)
## Unit: microseconds
##                         expr   min    lq    mean median    uq   max neval cld
##  factor(x, levels = LETTERS)  27.5  28.8  36.113  29.85  36.9 131.9   100  a 
##                    factor(x) 114.9 120.9 136.633 128.35 143.5 269.8   100   b
x <- rnorm(1000)
names(x) <- paste("n", 1:1000)
microbenchmark(
 unlist(Map(function(x) x**2, x), use.names = FALSE),
 unlist(Map(function(x) x**2, x))
)
## Unit: microseconds
##                                                expr    min     lq     mean
##  unlist(Map(function(x) x^2, x), use.names = FALSE)  941.1 1068.4 1380.142
##                     unlist(Map(function(x) x^2, x)) 1012.4 1132.4 1358.209
##   median      uq    max neval cld
##  1157.65 1508.65 7638.5   100   a
##  1230.05 1545.60 2174.0   100   a
smooth_weights_iteration_map <- function(graph, node_weights, alpha) {
 if (length(node_weights) != length(graph))
 stop("Incorrect number of nodes")
handle_i <- function(i) {
 neighbour_weights <- 0
 n <- 0
 for (j in graph[[i]]) {
 if (i != j) {
 neighbour_weights <- neighbour_weights + node_weights[j]
 n <- n + 1
 }
 }
 if (n > 0) {
 alpha * node_weights[i] + (1 - alpha) * neighbour_weights / n
 } else {
 node_weights[i]
 }
 }
 unlist(Map(handle_i, 1:length(node_weights)))
}

library(cluster) test_rmse <- function(data) { model <- data\(training %>% lm(dist ~ speed, data = .) predictions <- data\)test %>% predict(model, data = .) rmse(data\(test\)dist, predictions) } sample_rmse <- function (n) { random_cars <- cars %>% partition(n, c(training = 0.6, test = 0.4)) unlist(Map(test_rmse, random_cars)) } sample_rmse_parallel <- function (n) { random_cars <- cars %>% partition(n, c(training = 0.6, test = 0.4)) unlist(clustermq(cl, test_rmse, random_cars)) }

microbenchmark( sample_rmse(10), sample_rmse_parallel(10), times = 5 )

microbenchmark( sample_rmse(1000), sample_rmse_parallel(1000), times = 5 )

library(Rcpp)
cppFunction("
NumericVector
smooth_weights_iteration_cpp(List g,
 NumericVector node_weights,
double alpha)
{
 NumericVector new_weights(g.length());
 for (int i = 0; i < g.length(); ++i) {
 // The body here is just the C++ code
 // shown above...
 }
 return new_weights;
}
")
smooth_weights_cpp <- function(graph, node_weights,
 alpha, no_iterations) {
 new_weights <- node_weights
 for (i in 1:no_iterations) {
new_weights <-
 smooth_weights_iteration_cpp(graph, new_weights, alpha)
 }
 new_weights
}

library(microbenchmark) microbenchmark( smooth_weights(g, weights, 0.8, 10), smooth_weights_cpp(g, weights, 0.8, 10), times = 5 )

library(rstudioapi)

viewer <- getOption("viewer")
viewer("R-with Data Science.Rmd", height = "maximize")
## NULL
#rstudioapi::navigateToFile("")